From: no author Date: Fri, 26 Jul 2002 11:34:39 +0000 (+0000) Subject: This commit was manufactured by cvs2svn to create branch X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=5b579c5e34d0ee1f35a88898dbb1c307aeb73829;p=thirdparty%2Fvalgrind.git This commit was manufactured by cvs2svn to create branch 'VALGRIND_1_0_BRANCH'. git-svn-id: svn://svn.valgrind.org/valgrind/branches/VALGRIND_1_0_BRANCH@543 --- diff --git a/addrcheck/Makefile.am b/addrcheck/Makefile.am deleted file mode 100644 index 60553ddac6..0000000000 --- a/addrcheck/Makefile.am +++ /dev/null @@ -1,110 +0,0 @@ -SUBDIRS = demangle . docs tests - -CFLAGS = $(WERROR) -DVG_LIBDIR="\"$(libdir)"\" \ - -Winline -Wall -Wshadow -O -fomit-frame-pointer -g - -valdir = $(libdir)/valgrind - -LDFLAGS = -Wl,-z -Wl,initfirst - -INCLUDES = -I$(srcdir)/demangle - -bin_SCRIPTS = valgrind cachegrind vg_annotate - -SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp - -val_DATA = $(SUPP_FILES) default.supp - -BUILT_SOURCES = default.supp - -default.supp: $(SUPP_FILES) - -bzdist: dist - gunzip -c $(PACKAGE)-$(VERSION).tar.gz | bzip2 > $(PACKAGE)-$(VERSION).tar.bz2 - -EXTRA_DIST = $(val_DATA) \ - PATCHES_APPLIED ACKNOWLEDGEMENTS \ - README_KDE3_FOLKS README_PACKAGERS \ - README_MISSING_SYSCALL_OR_IOCTL TODO dosyms vg_libpthread.vs \ - valgrind.spec valgrind.spec.in - -val_PROGRAMS = valgrind.so valgrinq.so libpthread.so - -libpthread_so_SOURCES = vg_libpthread.c vg_libpthread_unimp.c - -valgrinq_so_SOURCES = vg_valgrinq_dummy.c - -valgrind_so_SOURCES = \ - vg_clientfuncs.c \ - vg_scheduler.c \ - vg_cachesim.c \ - vg_clientmalloc.c \ - vg_clientperms.c \ - vg_demangle.c \ - vg_dispatch.S \ - vg_errcontext.c \ - vg_execontext.c \ - vg_from_ucode.c \ - vg_helpers.S \ - vg_main.c \ - vg_malloc2.c \ - vg_memory.c \ - vg_messages.c \ - vg_mylibc.c \ - vg_procselfmaps.c \ - vg_profile.c \ - vg_signals.c \ - vg_startup.S \ - vg_symtab2.c \ - vg_syscall_mem.c \ - vg_syscall.S \ - vg_to_ucode.c \ - vg_translate.c \ - vg_transtab.c \ - vg_vtagops.c - -valgrind_so_LDADD = \ - demangle/cp-demangle.o \ - demangle/cplus-dem.o \ - demangle/dyn-string.o \ - demangle/safe-ctype.o - -include_HEADERS = valgrind.h - -noinst_HEADERS = \ - vg_cachesim_gen.c \ - vg_cachesim_I1.c \ - vg_cachesim_D1.c \ - vg_cachesim_L2.c \ - vg_kerneliface.h \ - vg_include.h \ - vg_constants.h \ - vg_unsafe.h - -MANUAL_DEPS = $(noinst_HEADERS) $(include_HEADERS) - -vg_memory.o: vg_memory.c $(MANUAL_DEPS) - $(COMPILE) -O2 @PREFERRED_STACK_BOUNDARY@ -c $< - -vg_clientfuncs.o: vg_clientfuncs.c $(MANUAL_DEPS) - $(COMPILE) -fno-omit-frame-pointer -c $< - -vg_libpthread.o: vg_libpthread.c $(MANUAL_DEPS) - $(COMPILE) -fno-omit-frame-pointer -c $< - -valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS) - $(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \ - $(valgrind_so_OBJECTS) $(valgrind_so_LDADD) - -valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS) - $(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS) - -libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs - $(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \ - $(libpthread_so_OBJECTS) \ - -Wl,-version-script $(srcdir)/vg_libpthread.vs - -install-exec-hook: - $(mkinstalldirs) $(DESTDIR)$(valdir) - rm -f $(DESTDIR)$(valdir)/libpthread.so.0 - $(LN_S) libpthread.so $(DESTDIR)$(valdir)/libpthread.so.0 diff --git a/cachegrind/Makefile.am b/cachegrind/Makefile.am deleted file mode 100644 index 60553ddac6..0000000000 --- a/cachegrind/Makefile.am +++ /dev/null @@ -1,110 +0,0 @@ -SUBDIRS = demangle . docs tests - -CFLAGS = $(WERROR) -DVG_LIBDIR="\"$(libdir)"\" \ - -Winline -Wall -Wshadow -O -fomit-frame-pointer -g - -valdir = $(libdir)/valgrind - -LDFLAGS = -Wl,-z -Wl,initfirst - -INCLUDES = -I$(srcdir)/demangle - -bin_SCRIPTS = valgrind cachegrind vg_annotate - -SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp - -val_DATA = $(SUPP_FILES) default.supp - -BUILT_SOURCES = default.supp - -default.supp: $(SUPP_FILES) - -bzdist: dist - gunzip -c $(PACKAGE)-$(VERSION).tar.gz | bzip2 > $(PACKAGE)-$(VERSION).tar.bz2 - -EXTRA_DIST = $(val_DATA) \ - PATCHES_APPLIED ACKNOWLEDGEMENTS \ - README_KDE3_FOLKS README_PACKAGERS \ - README_MISSING_SYSCALL_OR_IOCTL TODO dosyms vg_libpthread.vs \ - valgrind.spec valgrind.spec.in - -val_PROGRAMS = valgrind.so valgrinq.so libpthread.so - -libpthread_so_SOURCES = vg_libpthread.c vg_libpthread_unimp.c - -valgrinq_so_SOURCES = vg_valgrinq_dummy.c - -valgrind_so_SOURCES = \ - vg_clientfuncs.c \ - vg_scheduler.c \ - vg_cachesim.c \ - vg_clientmalloc.c \ - vg_clientperms.c \ - vg_demangle.c \ - vg_dispatch.S \ - vg_errcontext.c \ - vg_execontext.c \ - vg_from_ucode.c \ - vg_helpers.S \ - vg_main.c \ - vg_malloc2.c \ - vg_memory.c \ - vg_messages.c \ - vg_mylibc.c \ - vg_procselfmaps.c \ - vg_profile.c \ - vg_signals.c \ - vg_startup.S \ - vg_symtab2.c \ - vg_syscall_mem.c \ - vg_syscall.S \ - vg_to_ucode.c \ - vg_translate.c \ - vg_transtab.c \ - vg_vtagops.c - -valgrind_so_LDADD = \ - demangle/cp-demangle.o \ - demangle/cplus-dem.o \ - demangle/dyn-string.o \ - demangle/safe-ctype.o - -include_HEADERS = valgrind.h - -noinst_HEADERS = \ - vg_cachesim_gen.c \ - vg_cachesim_I1.c \ - vg_cachesim_D1.c \ - vg_cachesim_L2.c \ - vg_kerneliface.h \ - vg_include.h \ - vg_constants.h \ - vg_unsafe.h - -MANUAL_DEPS = $(noinst_HEADERS) $(include_HEADERS) - -vg_memory.o: vg_memory.c $(MANUAL_DEPS) - $(COMPILE) -O2 @PREFERRED_STACK_BOUNDARY@ -c $< - -vg_clientfuncs.o: vg_clientfuncs.c $(MANUAL_DEPS) - $(COMPILE) -fno-omit-frame-pointer -c $< - -vg_libpthread.o: vg_libpthread.c $(MANUAL_DEPS) - $(COMPILE) -fno-omit-frame-pointer -c $< - -valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS) - $(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \ - $(valgrind_so_OBJECTS) $(valgrind_so_LDADD) - -valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS) - $(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS) - -libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs - $(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \ - $(libpthread_so_OBJECTS) \ - -Wl,-version-script $(srcdir)/vg_libpthread.vs - -install-exec-hook: - $(mkinstalldirs) $(DESTDIR)$(valdir) - rm -f $(DESTDIR)$(valdir)/libpthread.so.0 - $(LN_S) libpthread.so $(DESTDIR)$(valdir)/libpthread.so.0 diff --git a/cachegrind/cg_annotate.in b/cachegrind/cg_annotate.in deleted file mode 100644 index 11821901c2..0000000000 --- a/cachegrind/cg_annotate.in +++ /dev/null @@ -1,893 +0,0 @@ -#! /usr/bin/perl -w -##--------------------------------------------------------------------## -##--- The cache simulation framework: instrumentation, recording ---## -##--- and results printing. ---## -##--- vg_annotate ---## -##--------------------------------------------------------------------## - -# This file is part of Valgrind, an x86 protected-mode emulator -# designed for debugging and profiling binaries on x86-Unixes. -# -# Copyright (C) 2002 Nicholas Nethercote -# njn25@cam.ac.uk -# -# This program is free software; you can redistribute it and/or -# modify it under the terms of the GNU General Public License as -# published by the Free Software Foundation; either version 2 of the -# License, or (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, but -# WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA -# 02111-1307, USA. -# -# The GNU General Public License is contained in the file LICENSE. - -#---------------------------------------------------------------------------- -# Annotator for cachegrind. -# -# File format is described in /docs/techdocs.html. -# -# Performance improvements record, using cachegrind.out for cacheprof, doing no -# source annotation (irrelevant ones removed): -# user time -# 1. turned off warnings in add_hash_a_to_b() 3.81 --> 3.48s -# [now add_array_a_to_b()] -# 6. make line_to_CC() return a ref instead of a hash 3.01 --> 2.77s -# -#10. changed file format to avoid file/fn name repetition 2.40s -# (not sure why higher; maybe due to new '.' entries?) -#11. changed file format to drop unnecessary end-line "."s 2.36s -# (shrunk file by about 37%) -#12. switched from hash CCs to array CCs 1.61s -#13. only adding b[i] to a[i] if b[i] defined (was doing it if -# either a[i] or b[i] was defined, but if b[i] was undefined -# it just added 0) 1.48s -#14. Stopped converting "." entries to undef and then back 1.16s -#15. Using foreach $i (x..y) instead of for ($i = 0...) in -# add_array_a_to_b() 1.11s -# -# Auto-annotating primes: -#16. Finding count lengths by int((length-1)/3), not by -# commifying (halves the number of commify calls) 1.68s --> 1.47s - -use strict; - -#---------------------------------------------------------------------------- -# Overview: the running example in the comments is for: -# - events = A,B,C,D -# - --show=C,A,D -# - --sort=D,C -#---------------------------------------------------------------------------- - -#---------------------------------------------------------------------------- -# Global variables, main data structures -#---------------------------------------------------------------------------- -# CCs are arrays, the counts corresponding to @events, with 'undef' -# representing '.'. This makes things fast (faster than using hashes for CCs) -# but we have to use @sort_order and @show_order below to handle the --sort and -# --show options, which is a bit tricky. -#---------------------------------------------------------------------------- - -# Total counts for summary (an array reference). -my $summary_CC; - -# Totals for each function, for overall summary. -# hash(filename:fn_name => CC array) -my %fn_totals; - -# Individual CCs, organised by filename and line_num for easy annotation. -# hash(filename => hash(line_num => CC array)) -my %all_ind_CCs; - -# Files chosen for annotation on the command line. -# key = basename (trimmed of any directory), value = full filename -my %user_ann_files; - -# Generic description string. -my $desc = ""; - -# Command line of profiled program. -my $cmd; - -# Events in input file, eg. (A,B,C,D) -my @events; - -# Events to show, from command line, eg. (C,A,D) -my @show_events; - -# Map from @show_events indices to @events indices, eg. (2,0,3). Gives the -# order in which we must traverse @events in order to show the @show_events, -# eg. (@events[$show_order[1]], @events[$show_order[2]]...) = @show_events. -# (Might help to think of it like a hash (0 => 2, 1 => 0, 2 => 3).) -my @show_order; - -# Print out the function totals sorted by these events, eg. (D,C). -my @sort_events; - -# Map from @sort_events indices to @events indices, eg. (3,2). Same idea as -# for @show_order. -my @sort_order; - -# Thresholds, one for each sort event (or default to 1 if no sort events -# specified). We print out functions and do auto-annotations until we've -# handled this proportion of all the events thresholded. -my @thresholds; - -my $default_threshold = 99; - -my $single_threshold = $default_threshold; - -# If on, automatically annotates all files that are involved in getting over -# all the threshold counts. -my $auto_annotate = 0; - -# Number of lines to show around each annotated line. -my $context = 8; - -# Directories in which to look for annotation files. -my @include_dirs = (""); - -# Input file name -my $input_file = "cachegrind.out"; - -# Version number -my $version = "@VERSION@"; - -# Usage message. -my $usage = < percentage of counts (of primary sort event) we - are interested in [$default_threshold%] - --auto=yes|no annotate all source files containing functions - that helped reach the event count threshold [no] - --context=N print N lines of context before and after - annotated lines [8] - -I --include= add to list of directories to search for - source files - - Valgrind is Copyright (C) 2000-2002 Julian Seward - and licensed under the GNU General Public License, version 2. - Bug reports, feedback, admiration, abuse, etc, to: jseward\@acm.org. - -END -; - -# Used in various places of output. -my $fancy = '-' x 80 . "\n"; - -#----------------------------------------------------------------------------- -# Argument and option handling -#----------------------------------------------------------------------------- -sub process_cmd_line() -{ - for my $arg (@ARGV) { - - # Option handling - if ($arg =~ /^-/) { - - # --version - if ($arg =~ /^-v$|^--version$/) { - die("vg_annotate-$version\n"); - - # --show=A,B,C - } elsif ($arg =~ /^--show=(.*)$/) { - @show_events = split(/,/, $1); - - # --sort=A,B,C - } elsif ($arg =~ /^--sort=(.*)$/) { - @sort_events = split(/,/, $1); - foreach my $i (0 .. scalar @sort_events - 1) { - if ($sort_events[$i] =~#/.*:(\d+)$/) { - /.*:([\d\.]+)%?$/) { - my $th = $1; - ($th >= 0 && $th <= 100) or die($usage); - $sort_events[$i] =~ s/:.*//; - $thresholds[$i] = $th; - } else { - $thresholds[$i] = 0; - } - } - - # --threshold=X (tolerates a trailing '%') - } elsif ($arg =~ /^--threshold=([\d\.]+)%?$/) { - $single_threshold = $1; - ($1 >= 0 && $1 <= 100) or die($usage); - - # --auto=yes|no - } elsif ($arg =~ /^--auto=(yes|no)$/) { - $auto_annotate = 1 if ($1 eq "yes"); - $auto_annotate = 0 if ($1 eq "no"); - - # --context=N - } elsif ($arg =~ /^--context=([\d\.]+)$/) { - $context = $1; - if ($context < 0) { - die($usage); - } - - # --include=A,B,C - } elsif ($arg =~ /^(-I|--include)=(.*)$/) { - my $inc = $2; - $inc =~ s|/$||; # trim trailing '/' - push(@include_dirs, "$inc/"); - - } else { # -h and --help fall under this case - die($usage); - } - - # Argument handling -- annotation file checking and selection. - # Stick filenames into a hash for quick 'n easy lookup throughout - } else { - my $readable = 0; - foreach my $include_dir (@include_dirs) { - if (-r $include_dir . $arg) { - $readable = 1; - } - } - $readable or die("File $arg not found in any of: @include_dirs\n"); - $user_ann_files{$arg} = 1; - } - } -} - -#----------------------------------------------------------------------------- -# Reading of input file -#----------------------------------------------------------------------------- -sub max ($$) -{ - my ($x, $y) = @_; - return ($x > $y ? $x : $y); -} - -# Add the two arrays; any '.' entries are ignored. Two tricky things: -# 1. If $a2->[$i] is undefined, it defaults to 0 which is what we want; we turn -# off warnings to allow this. This makes things about 10% faster than -# checking for definedness ourselves. -# 2. We don't add an undefined count or a ".", even though it's value is 0, -# because we don't want to make an $a2->[$i] that is undef become 0 -# unnecessarily. -sub add_array_a_to_b ($$) -{ - my ($a1, $a2) = @_; - - my $n = max(scalar @$a1, scalar @$a2); - $^W = 0; - foreach my $i (0 .. $n-1) { - $a2->[$i] += $a1->[$i] if (defined $a1->[$i] && "." ne $a1->[$i]); - } - $^W = 1; -} - -# Add each event count to the CC array. '.' counts become undef, as do -# missing entries (implicitly). -sub line_to_CC ($) -{ - my @CC = (split /\s+/, $_[0]); - (@CC <= @events) or die("Line $.: too many event counts\n"); - return \@CC; -} - -sub read_input_file() -{ - open(INPUTFILE, "< $input_file") || die "File $input_file not opened\n"; - - # Read "desc:" lines. - my $line; - # This gives a "uninitialized value in substitution (s///)" warning; hmm... - #while ($line = && $line =~ s/desc:\s+//) { - # $desc .= "$line\n"; - #} - while (1) { - $line = ; - if ($line =~ s/desc:\s+//) { - $desc .= $line; - } else { - last; - } - } - - # Read "cmd:" line (Nb: will already be in $line from "desc:" loop above). - ($line =~ s/cmd:\s+//) or die("Line $.: missing command line\n"); - $cmd = $line; - chomp($cmd); # Remove newline - - # Read "events:" line. We make a temporary hash in which the Nth event's - # value is N, which is useful for handling --show/--sort options below. - $line = ; - ($line =~ s/events:\s+//) or die("Line $.: missing events line\n"); - @events = split(/\s+/, $line); - my %events; - my $n = 0; - foreach my $event (@events) { - $events{$event} = $n; - $n++ - } - - # If no --show arg give, default to showing all events in the file. - # If --show option is used, check all specified events appeared in the - # "events:" line. Then initialise @show_order. - if (@show_events) { - foreach my $show_event (@show_events) { - (defined $events{$show_event}) or - die("--show event `$show_event' did not appear in input\n"); - } - } else { - @show_events = @events; - } - foreach my $show_event (@show_events) { - push(@show_order, $events{$show_event}); - } - - # Do as for --show, but if no --sort arg given, default to sorting by - # column order (ie. first column event is primary sort key, 2nd column is - # 2ndary key, etc). - if (@sort_events) { - foreach my $sort_event (@sort_events) { - (defined $events{$sort_event}) or - die("--sort event `$sort_event' did not appear in input\n"); - } - } else { - @sort_events = @events; - } - foreach my $sort_event (@sort_events) { - push(@sort_order, $events{$sort_event}); - } - - # If multiple threshold args weren't given via --sort, stick in the single - # threshold (either from --threshold if used, or the default otherwise) for - # the primary sort event, and 0% for the rest. - if (not @thresholds) { - foreach my $e (@sort_order) { - push(@thresholds, 0); - } - $thresholds[0] = $single_threshold; - } - - my $curr_file; - my $curr_fn; - my $curr_name; - - my $curr_fn_CC = []; - my $curr_file_ind_CCs = {}; # hash(line_num => CC) - - # Read body of input file. - while () { - s/#.*$//; # remove comments - if (s/^(\d+)\s+//) { - my $line_num = $1; - my $CC = line_to_CC($_); - add_array_a_to_b($CC, $curr_fn_CC); - - # If curr_file is selected, add CC to curr_file list. We look for - # full filename matches; or, if auto-annotating, we have to - # remember everything -- we won't know until the end what's needed. - if ($auto_annotate || defined $user_ann_files{$curr_file}) { - my $tmp = $curr_file_ind_CCs->{$line_num}; - $tmp = [] unless defined $tmp; - add_array_a_to_b($CC, $tmp); - $curr_file_ind_CCs->{$line_num} = $tmp; - } - - } elsif (s/^fn=(.*)$//) { - # Commit result from previous function - $fn_totals{$curr_name} = $curr_fn_CC if (defined $curr_name); - - # Setup new one - $curr_fn = $1; - $curr_name = "$curr_file:$curr_fn"; - $curr_fn_CC = $fn_totals{$curr_name}; - $curr_fn_CC = [] unless (defined $curr_fn_CC); - - } elsif (s/^fl=(.*)$//) { - $all_ind_CCs{$curr_file} = $curr_file_ind_CCs - if (defined $curr_file); - - $curr_file = $1; - $curr_file_ind_CCs = $all_ind_CCs{$curr_file}; - $curr_file_ind_CCs = {} unless (defined $curr_file_ind_CCs); - - } elsif (s/^(fi|fe)=(.*)$//) { - (defined $curr_name) or die("Line $.: Unexpected fi/fe line\n"); - $fn_totals{$curr_name} = $curr_fn_CC; - $all_ind_CCs{$curr_file} = $curr_file_ind_CCs; - - $curr_file = $2; - $curr_name = "$curr_file:$curr_fn"; - $curr_file_ind_CCs = $all_ind_CCs{$curr_file}; - $curr_file_ind_CCs = {} unless (defined $curr_file_ind_CCs); - $curr_fn_CC = $fn_totals{$curr_name}; - $curr_fn_CC = [] unless (defined $curr_fn_CC); - - } elsif (s/^\s*$//) { - # blank, do nothing - - } elsif (s/^summary:\s+//) { - # Finish up handling final filename/fn_name counts - $fn_totals{"$curr_file:$curr_fn"} = $curr_fn_CC - if (defined $curr_file && defined $curr_fn); - $all_ind_CCs{$curr_file} = - $curr_file_ind_CCs if (defined $curr_file); - - $summary_CC = line_to_CC($_); - (scalar(@$summary_CC) == @events) - or die("Line $.: summary event and total event mismatch\n"); - - } else { - warn("WARNING: line $. malformed, ignoring\n"); - } - } - - # Check if summary line was present - if (not defined $summary_CC) { - warn("WARNING: missing final summary line, no summary will be printed\n"); - } - - close(INPUTFILE); -} - -#----------------------------------------------------------------------------- -# Print options used -#----------------------------------------------------------------------------- -sub print_options () -{ - print($fancy); - print($desc); - print("Command: $cmd\n"); - print("Events recorded: @events\n"); - print("Events shown: @show_events\n"); - print("Event sort order: @sort_events\n"); - print("Thresholds: @thresholds\n"); - - my @include_dirs2 = @include_dirs; # copy @include_dirs - shift(@include_dirs2); # remove "" entry, which is always the first - unshift(@include_dirs2, "") if (0 == @include_dirs2); - my $include_dir = shift(@include_dirs2); - print("Include dirs: $include_dir\n"); - foreach my $include_dir (@include_dirs2) { - print(" $include_dir\n"); - } - - my @user_ann_files = keys %user_ann_files; - unshift(@user_ann_files, "") if (0 == @user_ann_files); - my $user_ann_file = shift(@user_ann_files); - print("User annotated: $user_ann_file\n"); - foreach $user_ann_file (@user_ann_files) { - print(" $user_ann_file\n"); - } - - my $is_on = ($auto_annotate ? "on" : "off"); - print("Auto-annotation: $is_on\n"); - print("\n"); -} - -#----------------------------------------------------------------------------- -# Print summary and sorted function totals -#----------------------------------------------------------------------------- -sub mycmp ($$) -{ - my ($c, $d) = @_; - - # Iterate through sort events (eg. 3,2); return result if two are different - foreach my $i (@sort_order) { - my ($x, $y); - $x = $c->[$i]; - $y = $d->[$i]; - $x = -1 unless defined $x; - $y = -1 unless defined $y; - - my $cmp = $y <=> $x; # reverse sort - if (0 != $cmp) { - return $cmp; - } - } - # Exhausted events, equal - return 0; -} - -sub commify ($) { - my ($val) = @_; - 1 while ($val =~ s/^(\d+)(\d{3})/$1,$2/); - return $val; -} - -# Because the counts can get very big, and we don't want to waste screen space -# and make lines too long, we compute exactly how wide each column needs to be -# by finding the widest entry for each one. -sub compute_CC_col_widths (@) -{ - my @CCs = @_; - my $CC_col_widths = []; - - # Initialise with minimum widths (from event names) - foreach my $event (@events) { - push(@$CC_col_widths, length($event)); - } - - # Find maximum width count for each column. @CC_col_width positions - # correspond to @CC positions. - foreach my $CC (@CCs) { - foreach my $i (0 .. scalar(@$CC)-1) { - if (defined $CC->[$i]) { - # Find length, accounting for commas that will be added - my $length = length $CC->[$i]; - my $clength = $length + int(($length - 1) / 3); - $CC_col_widths->[$i] = max($CC_col_widths->[$i], $clength); - } - } - } - return $CC_col_widths; -} - -# Print the CC with each column's size dictated by $CC_col_widths. -sub print_CC ($$) -{ - my ($CC, $CC_col_widths) = @_; - - foreach my $i (@show_order) { - my $count = (defined $CC->[$i] ? commify($CC->[$i]) : "."); - my $space = ' ' x ($CC_col_widths->[$i] - length($count)); - print("$space$count "); - } -} - -sub print_events ($) -{ - my ($CC_col_widths) = @_; - - foreach my $i (@show_order) { - my $event = $events[$i]; - my $event_width = length($event); - my $col_width = $CC_col_widths->[$i]; - my $space = ' ' x ($col_width - $event_width); - print("$space$event "); - } -} - -# Prints summary and function totals (with separate column widths, so that -# function names aren't pushed over unnecessarily by huge summary figures). -# Also returns a hash containing all the files that are involved in getting the -# events count above the thresholds (ie. all the interesting ones). -sub print_summary_and_fn_totals () -{ - my @fn_fullnames = keys %fn_totals; - - # Work out the size of each column for printing (summary and functions - # separately). - my $summary_CC_col_widths = compute_CC_col_widths($summary_CC); - my $fn_CC_col_widths = compute_CC_col_widths(values %fn_totals); - - # Header and counts for summary - print($fancy); - print_events($summary_CC_col_widths); - print("\n"); - print($fancy); - print_CC($summary_CC, $summary_CC_col_widths); - print(" PROGRAM TOTALS\n"); - print("\n"); - - # Header for functions - print($fancy); - print_events($fn_CC_col_widths); - print(" file:function\n"); - print($fancy); - - # Sort function names into order dictated by --sort option. - @fn_fullnames = sort { - mycmp($fn_totals{$a}, $fn_totals{$b}) - } @fn_fullnames; - - - # Assertion - (scalar @sort_order == scalar @thresholds) or - die("sort_order length != thresholds length:\n", - " @sort_order\n @thresholds\n"); - - my $threshold_files = {}; - # @curr_totals has the same shape as @sort_order and @thresholds - my @curr_totals = (); - foreach my $e (@thresholds) { - push(@curr_totals, 0); - } - - # Print functions, stopping when the threshold has been reached. - foreach my $fn_name (@fn_fullnames) { - - # Stop when we've reached all the thresholds - my $reached_all_thresholds = 1; - foreach my $i (0 .. scalar @thresholds - 1) { - my $prop = $curr_totals[$i] * 100 / $summary_CC->[$sort_order[$i]]; - $reached_all_thresholds &= ($prop >= $thresholds[$i]); - } - last if $reached_all_thresholds; - - # Print function results - my $fn_CC = $fn_totals{$fn_name}; - print_CC($fn_CC, $fn_CC_col_widths); - print(" $fn_name\n"); - - # Update the threshold counts - my $filename = $fn_name; - $filename =~ s/:.+$//; # remove function name - $threshold_files->{$filename} = 1; - foreach my $i (0 .. scalar @sort_order - 1) { - $curr_totals[$i] += $fn_CC->[$sort_order[$i]] - if (defined $fn_CC->[$sort_order[$i]]); - } - } - print("\n"); - - return $threshold_files; -} - -#----------------------------------------------------------------------------- -# Annotate selected files -#----------------------------------------------------------------------------- - -# Issue a warning that the source file is more recent than the input file. -sub warning_on_src_more_recent_than_inputfile ($) -{ - my $src_file = $_[0]; - - my $warning = <{"???"}; - %all_ann_files = (%user_ann_files, %$threshold_files) - } else { - %all_ann_files = %user_ann_files; - } - - # Track if we did any annotations. - my $did_annotations = 0; - - LOOP: - foreach my $src_file (keys %all_ann_files) { - - my $opened_file = ""; - my $full_file_name = ""; - foreach my $include_dir (@include_dirs) { - my $try_name = $include_dir . $src_file; - if (open(INPUTFILE, "< $try_name")) { - $opened_file = $try_name; - $full_file_name = ($include_dir eq "" - ? $src_file - : "$include_dir + $src_file"); - last; - } - } - - if (not $opened_file) { - # Failed to open the file. If chosen on the command line, die. - # If arose from auto-annotation, print a little message. - if (defined $user_ann_files{$src_file}) { - die("File $src_file not opened in any of: @include_dirs\n"); - - } else { - push(@unfound_auto_annotate_files, $src_file); - } - - } else { - # File header (distinguish between user- and auto-selected files). - print("$fancy"); - my $ann_type = - (defined $user_ann_files{$src_file} ? "User" : "Auto"); - print("-- $ann_type-annotated source: $full_file_name\n"); - print("$fancy"); - - # Get file's CCs - my $src_file_CCs = $all_ind_CCs{$src_file}; - if (!defined $src_file_CCs) { - print(" No information has been collected for $src_file\n\n"); - next LOOP; - } - - $did_annotations = 1; - - # Numeric, not lexicographic sort! - my @line_nums = sort {$a <=> $b} keys %$src_file_CCs; - - # If $src_file more recent than cachegrind.out, issue warning - my $src_more_recent_than_inputfile = 0; - if ((stat $opened_file)[9] > (stat $input_file)[9]) { - $src_more_recent_than_inputfile = 1; - warning_on_src_more_recent_than_inputfile($src_file); - } - - # Work out the size of each column for printing - my $CC_col_widths = compute_CC_col_widths(values %$src_file_CCs); - - # Events header - print_events($CC_col_widths); - print("\n\n"); - - # Shift out 0 if it's in the line numbers (from unknown entries, - # likely due to bugs in Valgrind's stabs debug info reader) - shift(@line_nums) if (0 == $line_nums[0]); - - # Finds interesting line ranges -- all lines with a CC, and all - # lines within $context lines of a line with a CC. - my $n = @line_nums; - my @pairs; - for (my $i = 0; $i < $n; $i++) { - push(@pairs, $line_nums[$i] - $context); # lower marker - while ($i < $n-1 && - $line_nums[$i] + 2*$context >= $line_nums[$i+1]) { - $i++; - } - push(@pairs, $line_nums[$i] + $context); # upper marker - } - - # Annotate chosen lines, tracking total counts of lines printed - $pairs[0] = 1 if ($pairs[0] < 1); - while (@pairs) { - my $low = shift @pairs; - my $high = shift @pairs; - while ($. < $low-1) { - my $tmp = ; - last unless (defined $tmp); # hack to detect EOF - } - my $src_line; - # Print line number, unless start of file - print("-- line $low " . '-' x 40 . "\n") if ($low != 1); - while (($. < $high) && ($src_line = )) { - if (defined $line_nums[0] && $. == $line_nums[0]) { - print_CC($src_file_CCs->{$.}, $CC_col_widths); - add_array_a_to_b($src_file_CCs->{$.}, - $printed_totals_CC); - shift(@line_nums); - - } else { - print_CC( [], $CC_col_widths); - } - - print(" $src_line"); - } - # Print line number, unless EOF - if ($src_line) { - print("-- line $high " . '-' x 40 . "\n"); - } else { - last; - } - } - - # If there was info on lines past the end of the file... - if (@line_nums) { - foreach my $line_num (@line_nums) { - print_CC($src_file_CCs->{$line_num}, $CC_col_widths); - print(" \n"); - } - print("\n"); - warning_on_nonexistent_lines($src_more_recent_than_inputfile, - $src_file, \@line_nums); - } - print("\n"); - - # Print summary of counts attributed to file but not to any - # particular line (due to incomplete debug info). - if ($src_file_CCs->{0}) { - print_CC($src_file_CCs->{0}, $CC_col_widths); - print(" \n\n"); - } - - close(INPUTFILE); - } - } - - # Print list of unfound auto-annotate selected files. - if (@unfound_auto_annotate_files) { - print("$fancy"); - print("The following files chosen for auto-annotation could not be found:\n"); - print($fancy); - foreach my $f (@unfound_auto_annotate_files) { - print(" $f\n"); - } - print("\n"); - } - - # If we did any annotating, print what proportion of events were covered by - # annotated lines above. - if ($did_annotations) { - my $percent_printed_CC; - foreach (my $i = 0; $i < @$summary_CC; $i++) { - $percent_printed_CC->[$i] = - sprintf("%.0f", - $printed_totals_CC->[$i] / $summary_CC->[$i] * 100); - } - my $pp_CC_col_widths = compute_CC_col_widths($percent_printed_CC); - print($fancy); - print_events($pp_CC_col_widths); - print("\n"); - print($fancy); - print_CC($percent_printed_CC, $pp_CC_col_widths); - print(" percentage of events annotated\n\n"); - } -} - -#---------------------------------------------------------------------------- -# "main()" -#---------------------------------------------------------------------------- -process_cmd_line(); -read_input_file(); -print_options(); -my $threshold_files = print_summary_and_fn_totals(); -annotate_ann_files($threshold_files); - -##--------------------------------------------------------------------## -##--- end vg_annotate.in ---## -##--------------------------------------------------------------------## - - diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c deleted file mode 100644 index 4f1bf10716..0000000000 --- a/cachegrind/cg_main.c +++ /dev/null @@ -1,1602 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- The cache simulation framework: instrumentation, recording ---*/ -/*--- and results printing. ---*/ -/*--- vg_cachesim.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2002 Nicholas Nethercote - njn25@cam.ac.uk - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_include.h" - -#include "vg_cachesim_L2.c" -#include "vg_cachesim_I1.c" -#include "vg_cachesim_D1.c" - - -/* According to IA-32 Intel Architecture Software Developer's Manual: Vol 2 */ -#define MAX_x86_INSTR_SIZE 16 - -/* Size of various buffers used for storing strings */ -#define FILENAME_LEN 256 -#define FN_NAME_LEN 256 -#define BUF_LEN 512 -#define COMMIFY_BUF_LEN 128 -#define RESULTS_BUF_LEN 128 -#define LINE_BUF_LEN 64 - - -/*------------------------------------------------------------*/ -/*--- Generic utility stuff ---*/ -/*------------------------------------------------------------*/ - -Int VG_(log2) ( Int x ) -{ - Int i; - /* Any more than 32 and we overflow anyway... */ - for (i = 0; i < 32; i++) { - if (1 << i == x) return i; - } - return -1; -} - - -/*------------------------------------------------------------*/ -/*--- Output file related stuff ---*/ -/*------------------------------------------------------------*/ - -#define OUT_FILE "cachegrind.out" - -static void file_err() -{ - VG_(message)(Vg_UserMsg, - "error: can't open cache simulation output file `%s'", - OUT_FILE ); - VG_(exit)(1); -} - -/*------------------------------------------------------------*/ -/*--- Cost center types, operations ---*/ -/*------------------------------------------------------------*/ - -typedef struct _CC CC; -struct _CC { - ULong a; - ULong m1; - ULong m2; -}; - -static __inline__ void initCC(CC* cc) { - cc->a = 0; - cc->m1 = 0; - cc->m2 = 0; -} - -typedef enum { INSTR_CC, READ_CC, WRITE_CC, MOD_CC } CC_type; - -/* Instruction-level cost-centres. The typedefs for these structs are in - * vg_include.c - * - * WARNING: the 'tag' field *must* be the first byte of both CC types. - * - * This is because we use it to work out what kind of CC we're dealing with. - */ -struct _iCC { - /* word 1 */ - UChar tag; - UChar instr_size; - /* 2 bytes padding */ - - /* words 2+ */ - Addr instr_addr; - CC I; -}; - -struct _idCC { - /* word 1 */ - UChar tag; - UChar instr_size; - UChar data_size; - /* 1 byte padding */ - - /* words 2+ */ - Addr instr_addr; - CC I; - CC D; -}; - -static void init_iCC(iCC* cc, Addr instr_addr, UInt instr_size) -{ - cc->tag = INSTR_CC; - cc->instr_size = instr_size; - cc->instr_addr = instr_addr; - initCC(&cc->I); -} - -static void init_idCC(CC_type X_CC, idCC* cc, Addr instr_addr, - UInt instr_size, UInt data_size) -{ - cc->tag = X_CC; - cc->instr_size = instr_size; - cc->data_size = data_size; - cc->instr_addr = instr_addr; - initCC(&cc->I); - initCC(&cc->D); -} - -#define ADD_CC_TO(CC_type, cc, total) \ - total.a += ((CC_type*)BBCC_ptr)->cc.a; \ - total.m1 += ((CC_type*)BBCC_ptr)->cc.m1; \ - total.m2 += ((CC_type*)BBCC_ptr)->cc.m2; - -/* If 1, address of each instruction is printed as a comment after its counts - * in cachegrind.out */ -#define PRINT_INSTR_ADDRS 0 - -static __inline__ void sprint_iCC(Char buf[BUF_LEN], iCC* cc) -{ -#if PRINT_INSTR_ADDRS - VG_(sprintf)(buf, "%llu %llu %llu # %x\n", - cc->I.a, cc->I.m1, cc->I.m2, cc->instr_addr); -#else - VG_(sprintf)(buf, "%llu %llu %llu\n", - cc->I.a, cc->I.m1, cc->I.m2); -#endif -} - -static __inline__ void sprint_read_or_mod_CC(Char buf[BUF_LEN], idCC* cc) -{ -#if PRINT_INSTR_ADDRS - VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu # %x\n", - cc->I.a, cc->I.m1, cc->I.m2, - cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr); -#else - VG_(sprintf)(buf, "%llu %llu %llu %llu %llu %llu\n", - cc->I.a, cc->I.m1, cc->I.m2, - cc->D.a, cc->D.m1, cc->D.m2); -#endif -} - -static __inline__ void sprint_write_CC(Char buf[BUF_LEN], idCC* cc) -{ -#if PRINT_INSTR_ADDRS - VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu # %x\n", - cc->I.a, cc->I.m1, cc->I.m2, - cc->D.a, cc->D.m1, cc->D.m2, cc->instr_addr); -#else - VG_(sprintf)(buf, "%llu %llu %llu . . . %llu %llu %llu\n", - cc->I.a, cc->I.m1, cc->I.m2, - cc->D.a, cc->D.m1, cc->D.m2); -#endif -} - -/*------------------------------------------------------------*/ -/*--- BBCC hash table stuff ---*/ -/*------------------------------------------------------------*/ - -/* The table of BBCCs is of the form hash(filename, hash(fn_name, - * hash(BBCCs))). Each hash table is separately chained. The sizes below work - * fairly well for Konqueror. */ - -#define N_FILE_ENTRIES 251 -#define N_FN_ENTRIES 53 -#define N_BBCC_ENTRIES 37 - -/* The cost centres for a basic block are stored in a contiguous array. - * They are distinguishable by their tag field. */ -typedef struct _BBCC BBCC; -struct _BBCC { - Addr orig_addr; - UInt array_size; /* byte-size of variable length array */ - BBCC* next; - Addr array[0]; /* variable length array */ -}; - -typedef struct _fn_node fn_node; -struct _fn_node { - Char* fn_name; - BBCC* BBCCs[N_BBCC_ENTRIES]; - fn_node* next; -}; - -typedef struct _file_node file_node; -struct _file_node { - Char* filename; - fn_node* fns[N_FN_ENTRIES]; - file_node* next; -}; - -/* BBCC_table structure: list(filename, list(fn_name, list(BBCC))) */ -static file_node *BBCC_table[N_FILE_ENTRIES]; - -static Int distinct_files = 0; -static Int distinct_fns = 0; - -static Int distinct_instrs = 0; -static Int full_debug_BBs = 0; -static Int file_line_debug_BBs = 0; -static Int fn_name_debug_BBs = 0; -static Int no_debug_BBs = 0; - -static Int BB_retranslations = 0; - -static CC Ir_discards; -static CC Dr_discards; -static CC Dw_discards; - -static void init_BBCC_table() -{ - Int i; - for (i = 0; i < N_FILE_ENTRIES; i++) - BBCC_table[i] = NULL; -} - -static void get_debug_info(Addr instr_addr, Char filename[FILENAME_LEN], - Char fn_name[FN_NAME_LEN], Int* line_num) -{ - Bool found1, found2, no_demangle = False; - - found1 = VG_(what_line_is_this)(instr_addr, filename, - FILENAME_LEN, line_num); - found2 = VG_(what_fn_is_this)(no_demangle, instr_addr, fn_name, FN_NAME_LEN); - - if (!found1 && !found2) { - no_debug_BBs++; - VG_(strcpy)(filename, "???"); - VG_(strcpy)(fn_name, "???"); - *line_num = 0; - - } else if ( found1 && found2) { - full_debug_BBs++; - - } else if ( found1 && !found2) { - file_line_debug_BBs++; - VG_(strcpy)(fn_name, "???"); - - } else /*(!found1 && found2)*/ { - fn_name_debug_BBs++; - VG_(strcpy)(filename, "???"); - *line_num = 0; - } -} - -/* Forward declaration. */ -static Int compute_BBCC_array_size(UCodeBlock* cb); - -static __inline__ -file_node* new_file_node(Char filename[FILENAME_LEN], file_node* next) -{ - Int i; - file_node* new = VG_(malloc)(VG_AR_PRIVATE, sizeof(file_node)); - new->filename = VG_(strdup)(VG_AR_PRIVATE, filename); - for (i = 0; i < N_FN_ENTRIES; i++) { - new->fns[i] = NULL; - } - new->next = next; - return new; -} - -static __inline__ -fn_node* new_fn_node(Char fn_name[FILENAME_LEN], fn_node* next) -{ - Int i; - fn_node* new = VG_(malloc)(VG_AR_PRIVATE, sizeof(fn_node)); - new->fn_name = VG_(strdup)(VG_AR_PRIVATE, fn_name); - for (i = 0; i < N_BBCC_ENTRIES; i++) { - new->BBCCs[i] = NULL; - } - new->next = next; - return new; -} - -static __inline__ -BBCC* new_BBCC(Addr bb_orig_addr, UCodeBlock* cb, BBCC* next) -{ - Int BBCC_array_size = compute_BBCC_array_size(cb); - BBCC* new; - - new = (BBCC*)VG_(malloc)(VG_AR_PRIVATE, sizeof(BBCC) + BBCC_array_size); - new->orig_addr = bb_orig_addr; - new->array_size = BBCC_array_size; - new->next = next; - - return new; -} - -#define HASH_CONSTANT 256 - -static UInt hash(Char *s, UInt table_size) -{ - int hash_value = 0; - for ( ; *s; s++) - hash_value = (HASH_CONSTANT * hash_value + *s) % table_size; - return hash_value; -} - -/* Do a three step traversal: by filename, then fn_name, then instr_addr. - * In all cases prepends new nodes to their chain. Returns a pointer to the - * cost centre. Also sets BB_seen_before by reference. - */ -static __inline__ BBCC* get_BBCC(Addr bb_orig_addr, UCodeBlock* cb, - Bool remove, Bool *BB_seen_before) -{ - file_node *curr_file_node; - fn_node *curr_fn_node; - BBCC **prev_BBCC_next_ptr, *curr_BBCC; - Char filename[FILENAME_LEN], fn_name[FN_NAME_LEN]; - UInt filename_hash, fnname_hash, BBCC_hash; - Int dummy_line_num; - - get_debug_info(bb_orig_addr, filename, fn_name, &dummy_line_num); - - VGP_PUSHCC(VgpCacheGetBBCC); - filename_hash = hash(filename, N_FILE_ENTRIES); - curr_file_node = BBCC_table[filename_hash]; - while (NULL != curr_file_node && - VG_(strcmp)(filename, curr_file_node->filename) != 0) { - curr_file_node = curr_file_node->next; - } - if (NULL == curr_file_node) { - BBCC_table[filename_hash] = curr_file_node = - new_file_node(filename, BBCC_table[filename_hash]); - distinct_files++; - } - - fnname_hash = hash(fn_name, N_FN_ENTRIES); - curr_fn_node = curr_file_node->fns[fnname_hash]; - while (NULL != curr_fn_node && - VG_(strcmp)(fn_name, curr_fn_node->fn_name) != 0) { - curr_fn_node = curr_fn_node->next; - } - if (NULL == curr_fn_node) { - curr_file_node->fns[fnname_hash] = curr_fn_node = - new_fn_node(fn_name, curr_file_node->fns[fnname_hash]); - distinct_fns++; - } - - BBCC_hash = bb_orig_addr % N_BBCC_ENTRIES; - prev_BBCC_next_ptr = &(curr_fn_node->BBCCs[BBCC_hash]); - curr_BBCC = curr_fn_node->BBCCs[BBCC_hash]; - while (NULL != curr_BBCC && bb_orig_addr != curr_BBCC->orig_addr) { - prev_BBCC_next_ptr = &(curr_BBCC->next); - curr_BBCC = curr_BBCC->next; - } - if (curr_BBCC == NULL) { - - vg_assert(False == remove); - - curr_fn_node->BBCCs[BBCC_hash] = curr_BBCC = - new_BBCC(bb_orig_addr, cb, curr_fn_node->BBCCs[BBCC_hash]); - *BB_seen_before = False; - - } else { - vg_assert(bb_orig_addr == curr_BBCC->orig_addr); - vg_assert(curr_BBCC->array_size > 0 && curr_BBCC->array_size < 1000000); - if (VG_(clo_verbosity) > 2) { - VG_(message)(Vg_DebugMsg, - "BB retranslation, retrieving from BBCC table"); - } - *BB_seen_before = True; - - if (True == remove) { - // Remove curr_BBCC from chain; it will be used and free'd by the - // caller. - *prev_BBCC_next_ptr = curr_BBCC->next; - - } else { - BB_retranslations++; - } - } - VGP_POPCC; - return curr_BBCC; -} - -/*------------------------------------------------------------*/ -/*--- Cache simulation instrumentation phase ---*/ -/*------------------------------------------------------------*/ - -#define uInstr1 VG_(newUInstr1) -#define uInstr2 VG_(newUInstr2) -#define uInstr3 VG_(newUInstr3) -#define dis VG_(disassemble) -#define uLiteral VG_(setLiteralField) -#define newTemp VG_(getNewTemp) - -static Int compute_BBCC_array_size(UCodeBlock* cb) -{ - UInstr* u_in; - Int i, CC_size, BBCC_size = 0; - Bool is_LOAD, is_STORE, is_FPU_R, is_FPU_W; - - is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False; - - for (i = 0; i < cb->used; i++) { - /* VG_(ppUInstr)(0, &cb->instrs[i]); */ - - u_in = &cb->instrs[i]; - switch(u_in->opcode) { - - case INCEIP: - goto case_for_end_of_instr; - - case JMP: - if (u_in->cond != CondAlways) break; - - goto case_for_end_of_instr; - - case_for_end_of_instr: - - CC_size = (is_LOAD || is_STORE || is_FPU_R || is_FPU_W - ? sizeof(idCC) : sizeof(iCC)); - - BBCC_size += CC_size; - is_LOAD = is_STORE = is_FPU_R = is_FPU_W = False; - break; - - case LOAD: - /* Two LDBs are possible for a single instruction */ - /* Also, a STORE can come after a LOAD for bts/btr/btc */ - vg_assert(/*!is_LOAD &&*/ /* !is_STORE && */ - !is_FPU_R && !is_FPU_W); - is_LOAD = True; - break; - - case STORE: - /* Multiple STOREs are possible for 'pushal' */ - vg_assert( /*!is_STORE &&*/ !is_FPU_R && !is_FPU_W); - is_STORE = True; - break; - - case FPU_R: - vg_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W); - is_FPU_R = True; - break; - - case FPU_W: - vg_assert(!is_LOAD && !is_STORE && !is_FPU_R && !is_FPU_W); - is_FPU_W = True; - break; - - default: - break; - } - } - - return BBCC_size; -} - -/* Use this rather than eg. -1 because it's stored as a UInt. */ -#define INVALID_DATA_SIZE 999999 - -UCodeBlock* VG_(cachesim_instrument)(UCodeBlock* cb_in, Addr orig_addr) -{ - UCodeBlock* cb; - Int i; - UInstr* u_in; - BBCC* BBCC_node; - Int t_CC_addr, t_read_addr, t_write_addr, t_data_addr; - Int CC_size = -1; /* Shut gcc warnings up */ - Addr instr_addr = orig_addr; - UInt instr_size, data_size = INVALID_DATA_SIZE; - Int helper = -1; /* Shut gcc warnings up */ - UInt stack_used; - Bool BB_seen_before = False; - Bool prev_instr_was_Jcond = False; - Addr BBCC_ptr0, BBCC_ptr; - - /* Get BBCC (creating if necessary -- requires a counting pass over the BB - * if it's the first time it's been seen), and point to start of the - * BBCC array. */ - BBCC_node = get_BBCC(orig_addr, cb_in, False, &BB_seen_before); - BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array); - - cb = VG_(allocCodeBlock)(); - cb->nextTemp = cb_in->nextTemp; - - t_CC_addr = t_read_addr = t_write_addr = t_data_addr = INVALID_TEMPREG; - - for (i = 0; i < cb_in->used; i++) { - u_in = &cb_in->instrs[i]; - - //VG_(ppUInstr)(0, u_in); - - /* What this is all about: we want to instrument each x86 instruction - * translation. The end of these are marked in three ways. The three - * ways, and the way we instrument them, are as follows: - * - * 1. UCode, INCEIP --> UCode, Instrumentation, INCEIP - * 2. UCode, Juncond --> UCode, Instrumentation, Juncond - * 3. UCode, Jcond, Juncond --> UCode, Instrumentation, Jcond, Juncond - * - * We must put the instrumentation before the jumps so that it is always - * executed. We don't have to put the instrumentation before the INCEIP - * (it could go after) but we do so for consistency. - * - * Junconds are always the last instruction in a basic block. Jconds are - * always the 2nd last, and must be followed by a Jcond. We check this - * with various assertions. - * - * Note that in VG_(disBB) we patched the `extra4b' field of the first - * occurring JMP in a block with the size of its x86 instruction. This - * is used now. - * - * Note that we don't have to treat JIFZ specially; unlike JMPs, JIFZ - * occurs in the middle of a BB and gets an INCEIP after it. - * - * The instrumentation is just a call to the appropriate helper function, - * passing it the address of the instruction's CC. - */ - if (prev_instr_was_Jcond) vg_assert(u_in->opcode == JMP); - - switch (u_in->opcode) { - - case INCEIP: - instr_size = u_in->val1; - goto case_for_end_of_x86_instr; - - case JMP: - if (u_in->cond == CondAlways) { - vg_assert(i+1 == cb_in->used); - - /* Don't instrument if previous instr was a Jcond. */ - if (prev_instr_was_Jcond) { - vg_assert(0 == u_in->extra4b); - VG_(copyUInstr)(cb, u_in); - break; - } - prev_instr_was_Jcond = False; - - } else { - vg_assert(i+2 == cb_in->used); /* 2nd last instr in block */ - prev_instr_was_Jcond = True; - } - - /* Ah, the first JMP... instrument, please. */ - instr_size = u_in->extra4b; - goto case_for_end_of_x86_instr; - - /* Shared code that is executed at the end of an x86 translation - * block, marked by either an INCEIP or an unconditional JMP. */ - case_for_end_of_x86_instr: - -#define IS_(X) (INVALID_TEMPREG != t_##X##_addr) - - /* Initialise the CC in the BBCC array appropriately if it hasn't - * been initialised before. - * Then call appropriate sim function, passing it the CC address. - * Note that CALLM_S/CALL_E aren't required here; by this point, - * the checking related to them has already happened. */ - stack_used = 0; - - vg_assert(instr_size >= 1 && instr_size <= MAX_x86_INSTR_SIZE); - vg_assert(0 != instr_addr); - - /* Save the caller-save registers before we push our args */ - uInstr1(cb, PUSH, 4, RealReg, R_EAX); - uInstr1(cb, PUSH, 4, RealReg, R_ECX); - uInstr1(cb, PUSH, 4, RealReg, R_EDX); - - if (!IS_(read) && !IS_(write)) { - iCC* CC_ptr = (iCC*)(BBCC_ptr); - vg_assert(INVALID_DATA_SIZE == data_size); - vg_assert(INVALID_TEMPREG == t_read_addr && - INVALID_TEMPREG == t_write_addr); - CC_size = sizeof(iCC); - if (!BB_seen_before) - init_iCC(CC_ptr, instr_addr, instr_size); - - helper = VGOFF_(cachesim_log_non_mem_instr); - - } else { - CC_type X_CC; - idCC* CC_ptr = (idCC*)(BBCC_ptr); - - vg_assert(4 == data_size || 2 == data_size || 1 == data_size || - 8 == data_size || 10 == data_size); - - CC_size = sizeof(idCC); - helper = VGOFF_(cachesim_log_mem_instr); - - if (IS_(read) && !IS_(write)) { - X_CC = READ_CC; - vg_assert(INVALID_TEMPREG != t_read_addr && - INVALID_TEMPREG == t_write_addr); - t_data_addr = t_read_addr; - - } else if (!IS_(read) && IS_(write)) { - X_CC = WRITE_CC; - vg_assert(INVALID_TEMPREG == t_read_addr && - INVALID_TEMPREG != t_write_addr); - t_data_addr = t_write_addr; - - } else { - vg_assert(IS_(read) && IS_(write)); - X_CC = MOD_CC; - vg_assert(INVALID_TEMPREG != t_read_addr && - INVALID_TEMPREG != t_write_addr); - t_data_addr = t_read_addr; - } - - if (!BB_seen_before) - init_idCC(X_CC, CC_ptr, instr_addr, instr_size, data_size); - - /* 2nd arg: data addr */ - uInstr1(cb, PUSH, 4, TempReg, t_data_addr); - stack_used += 4; - } -#undef IS_ - - /* 1st arg: CC addr */ - t_CC_addr = newTemp(cb); - uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_CC_addr); - uLiteral(cb, BBCC_ptr); - uInstr1(cb, PUSH, 4, TempReg, t_CC_addr); - stack_used += 4; - - /* Call function and return. */ - uInstr1(cb, CALLM, 0, Lit16, helper); - uInstr1(cb, CLEAR, 0, Lit16, stack_used); - - /* Restore the caller-save registers now the call is done */ - uInstr1(cb, POP, 4, RealReg, R_EDX); - uInstr1(cb, POP, 4, RealReg, R_ECX); - uInstr1(cb, POP, 4, RealReg, R_EAX); - - VG_(copyUInstr)(cb, u_in); - - /* Update BBCC_ptr, EIP, de-init read/write temps for next instr */ - BBCC_ptr += CC_size; - instr_addr += instr_size; - t_CC_addr = t_read_addr = t_write_addr = - t_data_addr = INVALID_TEMPREG; - data_size = INVALID_DATA_SIZE; - break; - - - /* For memory-ref instrs, copy the data_addr into a temporary to be - * passed to the cachesim_log_function at the end of the instruction. - */ - case LOAD: - t_read_addr = newTemp(cb); - uInstr2(cb, MOV, 4, TempReg, u_in->val1, TempReg, t_read_addr); - data_size = u_in->size; - VG_(copyUInstr)(cb, u_in); - break; - - case FPU_R: - t_read_addr = newTemp(cb); - uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_read_addr); - data_size = u_in->size; - VG_(copyUInstr)(cb, u_in); - break; - - /* Note that we must set t_write_addr even for mod instructions; - * that's how the code above determines whether it does a write; - * without it, it would think a mod instruction is a read. - * As for the MOV, if it's a mod instruction it's redundant, but it's - * not expensive and mod instructions are rare anyway. */ - case STORE: - case FPU_W: - t_write_addr = newTemp(cb); - uInstr2(cb, MOV, 4, TempReg, u_in->val2, TempReg, t_write_addr); - data_size = u_in->size; - VG_(copyUInstr)(cb, u_in); - break; - - case NOP: case CALLM_E: case CALLM_S: - break; - - default: - VG_(copyUInstr)(cb, u_in); - break; - } - } - - /* Just check everything looks ok */ - vg_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size); - - VG_(freeCodeBlock)(cb_in); - return cb; -} - -/*------------------------------------------------------------*/ -/*--- Cache simulation stuff ---*/ -/*------------------------------------------------------------*/ - -#define MIN_LINE_SIZE 16 - -/* Total reads/writes/misses. Calculated during CC traversal at the end. */ -static CC Ir_total; -static CC Dr_total; -static CC Dw_total; - -/* All CPUID info taken from sandpile.org/a32/cpuid.htm */ -/* Probably only works for Intel and AMD chips, and probably only for some of - * them. - */ - -static __inline__ void cpuid(Int n, Int *a, Int *b, Int *c, Int *d) -{ - __asm__ __volatile__ ( - "cpuid" - : "=a" (*a), "=b" (*b), "=c" (*c), "=d" (*d) /* output */ - : "0" (n) /* input */ - ); -} - -static void micro_ops_warn(Int actual_size, Int used_size, Int line_size) -{ - VG_(message)(Vg_DebugMsg, - "warning: Pentium with %d K micro_op instruction trace cache", - actual_size); - VG_(message)(Vg_DebugMsg, - " Simulating a %d KB cache with %d B lines", - used_size, line_size); -} - -/* Intel method is truly wretched. We have to do an insane indexing into an - * array of pre-defined configurations for various parts of the memory - * hierarchy. - */ -static -Int Intel_cache_info(Int level, cache_t* I1c, cache_t* D1c, cache_t* L2c) -{ - UChar info[16]; - Int i, trials; - - if (level < 2) { - VG_(message)(Vg_DebugMsg, - "warning: CPUID level < 2 for Intel processor (%d)", - level); - return -1; - } - - cpuid(2, (Int*)&info[0], (Int*)&info[4], - (Int*)&info[8], (Int*)&info[12]); - trials = info[0] - 1; /* AL register - bits 0..7 of %eax */ - info[0] = 0x0; /* reset AL */ - - if (0 != trials) { - VG_(message)(Vg_DebugMsg, - "warning: non-zero CPUID trials for Intel processor (%d)", - trials); - return -1; - } - - for (i = 0; i < 16; i++) { - - switch (info[i]) { - - case 0x0: /* ignore zeros */ - break; - - case 0x01: case 0x02: case 0x03: case 0x04: /* TLB info, ignore */ - case 0x90: case 0x96: case 0x9b: - break; - - case 0x06: *I1c = (cache_t) { 8, 4, 32 }; break; - case 0x08: *I1c = (cache_t) { 16, 4, 32 }; break; - - case 0x0a: *D1c = (cache_t) { 8, 2, 32 }; break; - case 0x0c: *D1c = (cache_t) { 16, 4, 32 }; break; - - case 0x22: case 0x23: case 0x25: case 0x29: - case 0x88: case 0x89: case 0x8a: - VG_(message)(Vg_DebugMsg, - "warning: L3 cache detected but ignored\n"); - break; - - case 0x40: - VG_(message)(Vg_DebugMsg, - "warning: L2 cache not installed, ignore L2 results."); - break; - - case 0x41: *L2c = (cache_t) { 128, 4, 32 }; break; - case 0x42: *L2c = (cache_t) { 256, 4, 32 }; break; - case 0x43: *L2c = (cache_t) { 512, 4, 32 }; break; - case 0x44: *L2c = (cache_t) { 1024, 4, 32 }; break; - case 0x45: *L2c = (cache_t) { 2048, 4, 32 }; break; - - /* These are sectored, whatever that means */ - case 0x66: *D1c = (cache_t) { 8, 4, 64 }; break; /* sectored */ - case 0x67: *D1c = (cache_t) { 16, 4, 64 }; break; /* sectored */ - case 0x68: *D1c = (cache_t) { 32, 4, 64 }; break; /* sectored */ - - /* HACK ALERT: Instruction trace cache -- capacity is micro-ops based. - * conversion to byte size is a total guess; treat the 12K and 16K - * cases the same since the cache byte size must be a power of two for - * everything to work!. Also guessing 32 bytes for the line size... - */ - case 0x70: /* 12K micro-ops, 8-way */ - *I1c = (cache_t) { 16, 8, 32 }; - micro_ops_warn(12, 16, 32); - break; - case 0x71: /* 16K micro-ops, 8-way */ - *I1c = (cache_t) { 16, 8, 32 }; - micro_ops_warn(16, 16, 32); - break; - case 0x72: /* 32K micro-ops, 8-way */ - *I1c = (cache_t) { 32, 8, 32 }; - micro_ops_warn(32, 32, 32); - break; - - case 0x79: *L2c = (cache_t) { 128, 8, 64 }; break; /* sectored */ - case 0x7a: *L2c = (cache_t) { 256, 8, 64 }; break; /* sectored */ - case 0x7b: *L2c = (cache_t) { 512, 8, 64 }; break; /* sectored */ - case 0x7c: *L2c = (cache_t) { 1024, 8, 64 }; break; /* sectored */ - - case 0x81: *L2c = (cache_t) { 128, 8, 32 }; break; - case 0x82: *L2c = (cache_t) { 256, 8, 32 }; break; - case 0x83: *L2c = (cache_t) { 512, 8, 32 }; break; - case 0x84: *L2c = (cache_t) { 1024, 8, 32 }; break; - case 0x85: *L2c = (cache_t) { 2048, 8, 32 }; break; - - default: - VG_(message)(Vg_DebugMsg, - "warning: Unknown Intel cache config value " - "(0x%x), ignoring\n", info[i]); - break; - } - } - return 0; -} - -/* AMD method is straightforward, just extract appropriate bits from the - * result registers. - * - * Bits, for D1 and I1: - * 31..24 data L1 cache size in KBs - * 23..16 data L1 cache associativity (FFh=full) - * 15.. 8 data L1 cache lines per tag - * 7.. 0 data L1 cache line size in bytes - * - * Bits, for L2: - * 31..16 unified L2 cache size in KBs - * 15..12 unified L2 cache associativity (0=off, FFh=full) - * 11.. 8 unified L2 cache lines per tag - * 7.. 0 unified L2 cache line size in bytes - * - * #3 The AMD K7 processor's L2 cache must be configured prior to relying - * upon this information. (Whatever that means -- njn) - * - * Returns 0 on success, non-zero on failure. - */ -static -Int AMD_cache_info(cache_t* I1c, cache_t* D1c, cache_t* L2c) -{ - Int dummy, ext_level; - Int I1i, D1i, L2i; - - cpuid(0x80000000, &ext_level, &dummy, &dummy, &dummy); - - if (0 == (ext_level & 0x80000000) || ext_level < 0x80000006) { - VG_(message)(Vg_UserMsg, - "warning: ext_level < 0x80000006 for AMD processor (0x%x)", - ext_level); - return -1; - } - - cpuid(0x80000005, &dummy, &dummy, &D1i, &I1i); - cpuid(0x80000006, &dummy, &dummy, &L2i, &dummy); - - D1c->size = (D1i >> 24) & 0xff; - D1c->assoc = (D1i >> 16) & 0xff; - D1c->line_size = (D1i >> 0) & 0xff; - - I1c->size = (I1i >> 24) & 0xff; - I1c->assoc = (I1i >> 16) & 0xff; - I1c->line_size = (I1i >> 0) & 0xff; - - L2c->size = (L2i >> 16) & 0xffff; /* Nb: different bits used for L2 */ - L2c->assoc = (L2i >> 12) & 0xf; - L2c->line_size = (L2i >> 0) & 0xff; - - return 0; -} - -static jmp_buf cpuid_jmpbuf; - -static -void cpuid_SIGILL_handler(int signum) -{ - __builtin_longjmp(cpuid_jmpbuf, 1); -} - -static -Int get_caches_from_CPUID(cache_t* I1c, cache_t* D1c, cache_t* L2c) -{ - Int level, res, ret; - Char vendor_id[13]; - vki_ksigaction sigill_new, sigill_saved; - - /* Install own SIGILL handler */ - sigill_new.ksa_handler = cpuid_SIGILL_handler; - sigill_new.ksa_flags = 0; - sigill_new.ksa_restorer = NULL; - res = VG_(ksigemptyset)( &sigill_new.ksa_mask ); - vg_assert(res == 0); - - res = VG_(ksigaction)( VKI_SIGILL, &sigill_new, &sigill_saved ); - vg_assert(res == 0); - - /* Trap for illegal instruction, in case it's a really old processor that - * doesn't support CPUID. */ - if (__builtin_setjmp(cpuid_jmpbuf) == 0) { - cpuid(0, &level, (int*)&vendor_id[0], - (int*)&vendor_id[8], (int*)&vendor_id[4]); - vendor_id[12] = '\0'; - - /* Restore old SIGILL handler */ - res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL ); - vg_assert(res == 0); - - } else { - VG_(message)(Vg_DebugMsg, "CPUID instruction not supported"); - - /* Restore old SIGILL handler */ - res = VG_(ksigaction)( VKI_SIGILL, &sigill_saved, NULL ); - vg_assert(res == 0); - return -1; - } - - if (0 == level) { - VG_(message)(Vg_DebugMsg, "CPUID level is 0, early Pentium?\n"); - return -1; - } - - /* Only handling Intel and AMD chips... no Cyrix, Transmeta, etc */ - if (0 == VG_(strcmp)(vendor_id, "GenuineIntel")) { - ret = Intel_cache_info(level, I1c, D1c, L2c); - - } else if (0 == VG_(strcmp)(vendor_id, "AuthenticAMD")) { - ret = AMD_cache_info(I1c, D1c, L2c); - - } else { - VG_(message)(Vg_DebugMsg, "CPU vendor ID not recognised (%s)", - vendor_id); - return -1; - } - - /* Successful! Convert sizes from KB to bytes */ - I1c->size *= 1024; - D1c->size *= 1024; - L2c->size *= 1024; - - return ret; -} - -/* Checks cache config is ok; makes it so if not. */ -static -void check_cache(cache_t* cache, cache_t* dflt, Char *name) -{ - /* First check they're all powers of two */ - if (-1 == VG_(log2)(cache->size)) { - VG_(message)(Vg_UserMsg, - "warning: %s size of %dB not a power of two; " - "defaulting to %dB", name, cache->size, dflt->size); - cache->size = dflt->size; - } - - if (-1 == VG_(log2)(cache->assoc)) { - VG_(message)(Vg_UserMsg, - "warning: %s associativity of %d not a power of two; " - "defaulting to %d-way", name, cache->assoc, dflt->assoc); - cache->assoc = dflt->assoc; - } - - if (-1 == VG_(log2)(cache->line_size)) { - VG_(message)(Vg_UserMsg, - "warning: %s line size of %dB not a power of two; " - "defaulting to %dB", - name, cache->line_size, dflt->line_size); - cache->line_size = dflt->line_size; - } - - /* Then check line size >= 16 -- any smaller and a single instruction could - * straddle three cache lines, which breaks a simulation assertion and is - * stupid anyway. */ - if (cache->line_size < MIN_LINE_SIZE) { - VG_(message)(Vg_UserMsg, - "warning: %s line size of %dB too small; " - "increasing to %dB", name, cache->line_size, MIN_LINE_SIZE); - cache->line_size = MIN_LINE_SIZE; - } - - /* Then check cache size > line size (causes seg faults if not). */ - if (cache->size <= cache->line_size) { - VG_(message)(Vg_UserMsg, - "warning: %s cache size of %dB <= line size of %dB; " - "increasing to %dB", name, cache->size, cache->line_size, - cache->line_size * 2); - cache->size = cache->line_size * 2; - } - - /* Then check assoc <= (size / line size) (seg faults otherwise). */ - if (cache->assoc > (cache->size / cache->line_size)) { - VG_(message)(Vg_UserMsg, - "warning: %s associativity > (size / line size); " - "increasing size to %dB", - name, cache->assoc * cache->line_size); - cache->size = cache->assoc * cache->line_size; - } -} - -/* On entry, args are undefined. Fill them with any info from the - * command-line, then fill in any remaining with CPUID instruction if possible, - * otherwise use defaults. Then check them and fix if not ok. */ -static -void get_caches(cache_t* I1c, cache_t* D1c, cache_t* L2c) -{ - /* Defaults are for a model 3 or 4 Athlon */ - cache_t I1_dflt = (cache_t) { 65536, 2, 64 }; - cache_t D1_dflt = (cache_t) { 65536, 2, 64 }; - cache_t L2_dflt = (cache_t) { 262144, 8, 64 }; - -#define CMD_LINE_DEFINED(L) \ - (-1 != VG_(clo_##L##_cache).size || \ - -1 != VG_(clo_##L##_cache).assoc || \ - -1 != VG_(clo_##L##_cache).line_size) - - *I1c = VG_(clo_I1_cache); - *D1c = VG_(clo_D1_cache); - *L2c = VG_(clo_L2_cache); - - /* If any undefined on command-line, try CPUID */ - if (! CMD_LINE_DEFINED(I1) || - ! CMD_LINE_DEFINED(D1) || - ! CMD_LINE_DEFINED(L2)) { - - /* Overwrite CPUID result for any cache defined on command-line */ - if (0 == get_caches_from_CPUID(I1c, D1c, L2c)) { - - if (CMD_LINE_DEFINED(I1)) *I1c = VG_(clo_I1_cache); - if (CMD_LINE_DEFINED(D1)) *D1c = VG_(clo_D1_cache); - if (CMD_LINE_DEFINED(L2)) *L2c = VG_(clo_L2_cache); - - /* CPUID failed, use defaults for each undefined by command-line */ - } else { - VG_(message)(Vg_DebugMsg, - "Couldn't detect cache configuration, using one " - "or more defaults "); - - *I1c = (CMD_LINE_DEFINED(I1) ? VG_(clo_I1_cache) : I1_dflt); - *D1c = (CMD_LINE_DEFINED(D1) ? VG_(clo_D1_cache) : D1_dflt); - *L2c = (CMD_LINE_DEFINED(L2) ? VG_(clo_L2_cache) : L2_dflt); - } - } -#undef CMD_LINE_DEFINED - - check_cache(I1c, &I1_dflt, "I1"); - check_cache(D1c, &D1_dflt, "D1"); - check_cache(L2c, &L2_dflt, "L2"); - - if (VG_(clo_verbosity) > 1) { - VG_(message)(Vg_UserMsg, "Cache configuration used:"); - VG_(message)(Vg_UserMsg, " I1: %dB, %d-way, %dB lines", - I1c->size, I1c->assoc, I1c->line_size); - VG_(message)(Vg_UserMsg, " D1: %dB, %d-way, %dB lines", - D1c->size, D1c->assoc, D1c->line_size); - VG_(message)(Vg_UserMsg, " L2: %dB, %d-way, %dB lines", - L2c->size, L2c->assoc, L2c->line_size); - } -} - -void VG_(init_cachesim)(void) -{ - cache_t I1c, D1c, L2c; - - /* Make sure the output file can be written. */ - Int fd = VG_(open_write)(OUT_FILE); - if (-1 == fd) { - fd = VG_(create_and_write)(OUT_FILE); - if (-1 == fd) { - file_err(); - } - } - VG_(close)(fd); - - initCC(&Ir_total); - initCC(&Dr_total); - initCC(&Dw_total); - - initCC(&Ir_discards); - initCC(&Dr_discards); - initCC(&Dw_discards); - - get_caches(&I1c, &D1c, &L2c); - - cachesim_I1_initcache(I1c); - //cachesim_I1_initcache(); - cachesim_D1_initcache(D1c); - //cachesim_D1_initcache(); - cachesim_L2_initcache(L2c); - //cachesim_L2_initcache(); - - init_BBCC_table(); -} - -void VG_(cachesim_log_non_mem_instr)(iCC* cc) -{ - //VG_(printf)("sim I: CCaddr=0x%x, iaddr=0x%x, isize=%u\n", - // cc, cc->instr_addr, cc->instr_size) - VGP_PUSHCC(VgpCacheSimulate); - cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2); - cc->I.a++; - VGP_POPCC; -} - -void VG_(cachesim_log_mem_instr)(idCC* cc, Addr data_addr) -{ - //VG_(printf)("sim D: CCaddr=0x%x, iaddr=0x%x, isize=%u, daddr=0x%x, dsize=%u\n", - // cc, cc->instr_addr, cc->instr_size, data_addr, cc->data_size) - VGP_PUSHCC(VgpCacheSimulate); - cachesim_I1_doref(cc->instr_addr, cc->instr_size, &cc->I.m1, &cc->I.m2); - cc->I.a++; - - cachesim_D1_doref(data_addr, cc->data_size, &cc->D.m1, &cc->D.m2); - cc->D.a++; - VGP_POPCC; -} - -/*------------------------------------------------------------*/ -/*--- Printing of output file and summary stats ---*/ -/*------------------------------------------------------------*/ - -static void fprint_BBCC(Int fd, BBCC* BBCC_node, Char *first_instr_fl, - Char *first_instr_fn) -{ - Addr BBCC_ptr0, BBCC_ptr; - Char buf[BUF_LEN], curr_file[BUF_LEN], - fbuf[BUF_LEN+4], lbuf[LINE_BUF_LEN]; - UInt line_num; - - BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array); - - /* Mark start of basic block in output, just to ease debugging */ - VG_(write)(fd, (void*)"\n", 1); - - VG_(strcpy)(curr_file, first_instr_fl); - - while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) { - - /* We pretend the CC is an iCC for getting the tag. This is ok - * because both CC types have tag as their first byte. Once we know - * the type, we can cast and act appropriately. */ - - Char fl_buf[FILENAME_LEN]; - Char fn_buf[FN_NAME_LEN]; - - Addr instr_addr; - switch ( ((iCC*)BBCC_ptr)->tag ) { - - case INSTR_CC: - instr_addr = ((iCC*)BBCC_ptr)->instr_addr; - sprint_iCC(buf, (iCC*)BBCC_ptr); - ADD_CC_TO(iCC, I, Ir_total); - BBCC_ptr += sizeof(iCC); - break; - - case READ_CC: - case MOD_CC: - instr_addr = ((idCC*)BBCC_ptr)->instr_addr; - sprint_read_or_mod_CC(buf, (idCC*)BBCC_ptr); - ADD_CC_TO(idCC, I, Ir_total); - ADD_CC_TO(idCC, D, Dr_total); - BBCC_ptr += sizeof(idCC); - break; - - case WRITE_CC: - instr_addr = ((idCC*)BBCC_ptr)->instr_addr; - sprint_write_CC(buf, (idCC*)BBCC_ptr); - ADD_CC_TO(idCC, I, Ir_total); - ADD_CC_TO(idCC, D, Dw_total); - BBCC_ptr += sizeof(idCC); - break; - - default: - VG_(panic)("Unknown CC type in fprint_BBCC()\n"); - break; - } - distinct_instrs++; - - get_debug_info(instr_addr, fl_buf, fn_buf, &line_num); - - /* Allow for filename switching in the middle of a BB; if this happens, - * must print the new filename with the function name. */ - if (0 != VG_(strcmp)(fl_buf, curr_file)) { - VG_(strcpy)(curr_file, fl_buf); - VG_(sprintf)(fbuf, "fi=%s\n", curr_file); - VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf)); - } - - /* If the function name for this instruction doesn't match that of the - * first instruction in the BB, print warning. */ - if (VG_(clo_trace_symtab) && 0 != VG_(strcmp)(fn_buf, first_instr_fn)) { - VG_(printf)("Mismatched function names\n"); - VG_(printf)(" filenames: BB:%s, instr:%s;" - " fn_names: BB:%s, instr:%s;" - " line: %d\n", - first_instr_fl, fl_buf, - first_instr_fn, fn_buf, - line_num); - } - - VG_(sprintf)(lbuf, "%u ", line_num); - VG_(write)(fd, (void*)lbuf, VG_(strlen)(lbuf)); /* line number */ - VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); /* cost centre */ - } - /* If we switched filenames in the middle of the BB without switching back, - * switch back now because the subsequent BB may be relying on falling under - * the original file name. */ - if (0 != VG_(strcmp)(first_instr_fl, curr_file)) { - VG_(sprintf)(fbuf, "fe=%s\n", first_instr_fl); - VG_(write)(fd, (void*)fbuf, VG_(strlen)(fbuf)); - } - - /* Mark end of basic block */ - /* VG_(write)(fd, (void*)"#}\n", 3); */ - - vg_assert(BBCC_ptr - BBCC_ptr0 == BBCC_node->array_size); -} - -static void fprint_BBCC_table_and_calc_totals(Int client_argc, - Char** client_argv) -{ - Int fd; - Char buf[BUF_LEN]; - file_node *curr_file_node; - fn_node *curr_fn_node; - BBCC *curr_BBCC; - Int i,j,k; - - VGP_PUSHCC(VgpCacheDump); - fd = VG_(open_write)(OUT_FILE); - if (-1 == fd) { file_err(); } - - /* "desc:" lines (giving I1/D1/L2 cache configuration) */ - VG_(sprintf)(buf, "desc: I1 cache: %s\n", I1.desc_line); - VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); - VG_(sprintf)(buf, "desc: D1 cache: %s\n", D1.desc_line); - VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); - VG_(sprintf)(buf, "desc: L2 cache: %s\n", L2.desc_line); - VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); - - /* "cmd:" line */ - VG_(strcpy)(buf, "cmd:"); - VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); - for (i = 0; i < client_argc; i++) { - VG_(sprintf)(buf, " %s", client_argv[i]); - VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); - } - /* "events:" line */ - VG_(sprintf)(buf, "\nevents: Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw\n"); - VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); - - /* Six loops here: three for the hash table arrays, and three for the - * chains hanging off the hash table arrays. */ - for (i = 0; i < N_FILE_ENTRIES; i++) { - curr_file_node = BBCC_table[i]; - while (curr_file_node != NULL) { - VG_(sprintf)(buf, "fl=%s\n", curr_file_node->filename); - VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); - - for (j = 0; j < N_FN_ENTRIES; j++) { - curr_fn_node = curr_file_node->fns[j]; - while (curr_fn_node != NULL) { - VG_(sprintf)(buf, "fn=%s\n", curr_fn_node->fn_name); - VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); - - for (k = 0; k < N_BBCC_ENTRIES; k++) { - curr_BBCC = curr_fn_node->BBCCs[k]; - while (curr_BBCC != NULL) { - fprint_BBCC(fd, curr_BBCC, - - curr_file_node->filename, - curr_fn_node->fn_name); - - curr_BBCC = curr_BBCC->next; - } - } - curr_fn_node = curr_fn_node->next; - } - } - curr_file_node = curr_file_node->next; - } - } - - /* Print stats from any discarded basic blocks */ - if (0 != Ir_discards.a) { - - VG_(sprintf)(buf, "fl=(discarded)\n"); - VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); - VG_(sprintf)(buf, "fn=(discarded)\n"); - VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); - - /* Use 0 as line number */ - VG_(sprintf)(buf, "0 %llu %llu %llu %llu %llu %llu %llu %llu %llu\n", - Ir_discards.a, Ir_discards.m1, Ir_discards.m2, - Dr_discards.a, Dr_discards.m1, Dr_discards.m2, - Dw_discards.a, Dw_discards.m1, Dw_discards.m2); - VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); - - Ir_total.a += Ir_discards.a; - Ir_total.m1 += Ir_discards.m1; - Ir_total.m2 += Ir_discards.m2; - Dr_total.a += Dr_discards.a; - Dr_total.m1 += Dr_discards.m1; - Dr_total.m2 += Dr_discards.m2; - Dw_total.a += Dw_discards.a; - Dw_total.m1 += Dw_discards.m1; - Dw_total.m2 += Dw_discards.m2; - } - - /* Summary stats must come after rest of table, since we calculate them - * during traversal. */ - VG_(sprintf)(buf, "summary: " - "%llu %llu %llu " - "%llu %llu %llu " - "%llu %llu %llu\n", - Ir_total.a, Ir_total.m1, Ir_total.m2, - Dr_total.a, Dr_total.m1, Dr_total.m2, - Dw_total.a, Dw_total.m1, Dw_total.m2); - VG_(write)(fd, (void*)buf, VG_(strlen)(buf)); - VG_(close)(fd); -} - -/* Adds commas to ULong, right justifying in a field field_width wide, returns - * the string in buf. */ -static -Int commify(ULong n, int field_width, char buf[COMMIFY_BUF_LEN]) -{ - int len, n_commas, i, j, new_len, space; - - VG_(sprintf)(buf, "%lu", n); - len = VG_(strlen)(buf); - n_commas = (len - 1) / 3; - new_len = len + n_commas; - space = field_width - new_len; - - /* Allow for printing a number in a field_width smaller than it's size */ - if (space < 0) space = 0; - - /* Make j = -1 because we copy the '\0' before doing the numbers in groups - * of three. */ - for (j = -1, i = len ; i >= 0; i--) { - buf[i + n_commas + space] = buf[i]; - - if (3 == ++j) { - j = 0; - n_commas--; - buf[i + n_commas + space] = ','; - } - } - /* Right justify in field. */ - for (i = 0; i < space; i++) buf[i] = ' '; - return new_len; -} - -static -void percentify(Int n, Int pow, Int field_width, char buf[]) -{ - int i, len, space; - - VG_(sprintf)(buf, "%d.%d%%", n / pow, n % pow); - len = VG_(strlen)(buf); - space = field_width - len; - i = len; - - /* Right justify in field */ - for ( ; i >= 0; i--) buf[i + space] = buf[i]; - for (i = 0; i < space; i++) buf[i] = ' '; -} - -void VG_(do_cachesim_results)(Int client_argc, Char** client_argv) -{ - CC D_total; - ULong L2_total_m, L2_total_mr, L2_total_mw, - L2_total, L2_total_r, L2_total_w; - char buf1[RESULTS_BUF_LEN], - buf2[RESULTS_BUF_LEN], - buf3[RESULTS_BUF_LEN]; - Int l1, l2, l3; - Int p; - - fprint_BBCC_table_and_calc_totals(client_argc, client_argv); - - if (VG_(clo_verbosity) == 0) - return; - - /* I cache results. Use the I_refs value to determine the first column - * width. */ - l1 = commify(Ir_total.a, 0, buf1); - VG_(message)(Vg_UserMsg, "I refs: %s", buf1); - - commify(Ir_total.m1, l1, buf1); - VG_(message)(Vg_UserMsg, "I1 misses: %s", buf1); - - commify(Ir_total.m2, l1, buf1); - VG_(message)(Vg_UserMsg, "L2i misses: %s", buf1); - - p = 100; - - percentify(Ir_total.m1 * 100 * p / Ir_total.a, p, l1+1, buf1); - VG_(message)(Vg_UserMsg, "I1 miss rate: %s", buf1); - - percentify(Ir_total.m2 * 100 * p / Ir_total.a, p, l1+1, buf1); - VG_(message)(Vg_UserMsg, "L2i miss rate: %s", buf1); - VG_(message)(Vg_UserMsg, ""); - - /* D cache results. Use the D_refs.rd and D_refs.wr values to determine the - * width of columns 2 & 3. */ - D_total.a = Dr_total.a + Dw_total.a; - D_total.m1 = Dr_total.m1 + Dw_total.m1; - D_total.m2 = Dr_total.m2 + Dw_total.m2; - - commify( D_total.a, l1, buf1); - l2 = commify(Dr_total.a, 0, buf2); - l3 = commify(Dw_total.a, 0, buf3); - VG_(message)(Vg_UserMsg, "D refs: %s (%s rd + %s wr)", - buf1, buf2, buf3); - - commify( D_total.m1, l1, buf1); - commify(Dr_total.m1, l2, buf2); - commify(Dw_total.m1, l3, buf3); - VG_(message)(Vg_UserMsg, "D1 misses: %s (%s rd + %s wr)", - buf1, buf2, buf3); - - commify( D_total.m2, l1, buf1); - commify(Dr_total.m2, l2, buf2); - commify(Dw_total.m2, l3, buf3); - VG_(message)(Vg_UserMsg, "L2d misses: %s (%s rd + %s wr)", - buf1, buf2, buf3); - - p = 10; - - percentify( D_total.m1 * 100 * p / D_total.a, p, l1+1, buf1); - percentify(Dr_total.m1 * 100 * p / Dr_total.a, p, l2+1, buf2); - percentify(Dw_total.m1 * 100 * p / Dw_total.a, p, l3+1, buf3); - VG_(message)(Vg_UserMsg, "D1 miss rate: %s (%s + %s )", buf1, buf2,buf3); - - percentify( D_total.m2 * 100 * p / D_total.a, p, l1+1, buf1); - percentify(Dr_total.m2 * 100 * p / Dr_total.a, p, l2+1, buf2); - percentify(Dw_total.m2 * 100 * p / Dw_total.a, p, l3+1, buf3); - VG_(message)(Vg_UserMsg, "L2d miss rate: %s (%s + %s )", buf1, buf2,buf3); - VG_(message)(Vg_UserMsg, ""); - - /* L2 overall results */ - - L2_total = Dr_total.m1 + Dw_total.m1 + Ir_total.m1; - L2_total_r = Dr_total.m1 + Ir_total.m1; - L2_total_w = Dw_total.m1; - commify(L2_total, l1, buf1); - commify(L2_total_r, l2, buf2); - commify(L2_total_w, l3, buf3); - VG_(message)(Vg_UserMsg, "L2 refs: %s (%s rd + %s wr)", - buf1, buf2, buf3); - - L2_total_m = Dr_total.m2 + Dw_total.m2 + Ir_total.m2; - L2_total_mr = Dr_total.m2 + Ir_total.m2; - L2_total_mw = Dw_total.m2; - commify(L2_total_m, l1, buf1); - commify(L2_total_mr, l2, buf2); - commify(L2_total_mw, l3, buf3); - VG_(message)(Vg_UserMsg, "L2 misses: %s (%s rd + %s wr)", - buf1, buf2, buf3); - - percentify(L2_total_m * 100 * p / (Ir_total.a + D_total.a), p, l1+1, buf1); - percentify(L2_total_mr * 100 * p / (Ir_total.a + Dr_total.a), p, l2+1, buf2); - percentify(L2_total_mw * 100 * p / Dw_total.a, p, l3+1, buf3); - VG_(message)(Vg_UserMsg, "L2 miss rate: %s (%s + %s )", buf1, buf2,buf3); - - - /* Hash table stats */ - if (VG_(clo_verbosity) > 1) { - int BB_lookups = full_debug_BBs + fn_name_debug_BBs + - file_line_debug_BBs + no_debug_BBs; - - VG_(message)(Vg_DebugMsg, ""); - VG_(message)(Vg_DebugMsg, "Distinct files: %d", distinct_files); - VG_(message)(Vg_DebugMsg, "Distinct fns: %d", distinct_fns); - VG_(message)(Vg_DebugMsg, "BB lookups: %d", BB_lookups); - VG_(message)(Vg_DebugMsg, "With full debug info:%3d%% (%d)", - full_debug_BBs * 100 / BB_lookups, - full_debug_BBs); - VG_(message)(Vg_DebugMsg, "With file/line debug info:%3d%% (%d)", - file_line_debug_BBs * 100 / BB_lookups, - file_line_debug_BBs); - VG_(message)(Vg_DebugMsg, "With fn name debug info:%3d%% (%d)", - fn_name_debug_BBs * 100 / BB_lookups, - fn_name_debug_BBs); - VG_(message)(Vg_DebugMsg, "With no debug info:%3d%% (%d)", - no_debug_BBs * 100 / BB_lookups, - no_debug_BBs); - VG_(message)(Vg_DebugMsg, "BBs Retranslated: %d", BB_retranslations); - VG_(message)(Vg_DebugMsg, "Distinct instrs: %d", distinct_instrs); - } - VGP_POPCC; -} - - -/* Called when a translation is invalidated due to self-modifying code or - * unloaded of a shared object. - * - * Finds the BBCC in the table, removes it, adds the counts to the discard - * counters, and then frees the BBCC. */ -void VG_(cachesim_notify_discard) ( TTEntry* tte ) -{ - BBCC *BBCC_node; - Addr BBCC_ptr0, BBCC_ptr; - Bool BB_seen_before; - - if (0) - VG_(printf)( "cachesim_notify_discard: %p for %d\n", - tte->orig_addr, (Int)tte->orig_size); - - /* 2nd arg won't be used since BB should have been seen before (assertions - * ensure this). */ - BBCC_node = get_BBCC(tte->orig_addr, NULL, True, &BB_seen_before); - BBCC_ptr0 = BBCC_ptr = (Addr)(BBCC_node->array); - - vg_assert(True == BB_seen_before); - - while (BBCC_ptr - BBCC_ptr0 < BBCC_node->array_size) { - - /* We pretend the CC is an iCC for getting the tag. This is ok - * because both CC types have tag as their first byte. Once we know - * the type, we can cast and act appropriately. */ - - switch ( ((iCC*)BBCC_ptr)->tag ) { - - case INSTR_CC: - ADD_CC_TO(iCC, I, Ir_discards); - BBCC_ptr += sizeof(iCC); - break; - - case READ_CC: - case MOD_CC: - ADD_CC_TO(idCC, I, Ir_discards); - ADD_CC_TO(idCC, D, Dr_discards); - BBCC_ptr += sizeof(idCC); - break; - - case WRITE_CC: - ADD_CC_TO(idCC, I, Ir_discards); - ADD_CC_TO(idCC, D, Dw_discards); - BBCC_ptr += sizeof(idCC); - break; - - default: - VG_(panic)("Unknown CC type in VG_(cachesim_notify_discard)()\n"); - break; - } - } - - VG_(free)(VG_AR_PRIVATE, BBCC_node); -} - -/*--------------------------------------------------------------------*/ -/*--- end vg_cachesim.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/cachegrind/cg_sim_D1.c b/cachegrind/cg_sim_D1.c deleted file mode 100644 index 7b8a8da155..0000000000 --- a/cachegrind/cg_sim_D1.c +++ /dev/null @@ -1,38 +0,0 @@ -/*--------------------------------------------------------------------*/ -/*--- D1 cache simulation. ---*/ -/*--- vg_cachesim_D1.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2002 Nicholas Nethercote - njn25@cam.ac.uk - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_cachesim_gen.c" - -CACHESIM(D1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } ); - -/*--------------------------------------------------------------------*/ -/*--- end vg_cachesim_D1.c ---*/ -/*--------------------------------------------------------------------*/ - diff --git a/cachegrind/cg_sim_I1.c b/cachegrind/cg_sim_I1.c deleted file mode 100644 index 26db3b3488..0000000000 --- a/cachegrind/cg_sim_I1.c +++ /dev/null @@ -1,38 +0,0 @@ -/*--------------------------------------------------------------------*/ -/*--- I1 cache simulation. ---*/ -/*--- vg_cachesim_I1.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2002 Nicholas Nethercote - njn25@cam.ac.uk - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_cachesim_gen.c" - -CACHESIM(I1, { (*m1)++; cachesim_L2_doref(a, size, m1, m2); } ); - -/*--------------------------------------------------------------------*/ -/*--- end vg_cachesim_I1.c ---*/ -/*--------------------------------------------------------------------*/ - diff --git a/cachegrind/cg_sim_L2.c b/cachegrind/cg_sim_L2.c deleted file mode 100644 index ec89027429..0000000000 --- a/cachegrind/cg_sim_L2.c +++ /dev/null @@ -1,38 +0,0 @@ -/*--------------------------------------------------------------------*/ -/*--- L2 cache simulation. ---*/ -/*--- vg_cachesim_L2.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2002 Nicholas Nethercote - njn25@cam.ac.uk - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_cachesim_gen.c" - -CACHESIM(L2, (*m2)++ ); - -/*--------------------------------------------------------------------*/ -/*--- end vg_cachesim_L2.c ---*/ -/*--------------------------------------------------------------------*/ - diff --git a/cachegrind/cg_sim_gen.c b/cachegrind/cg_sim_gen.c deleted file mode 100644 index f938bc4b0f..0000000000 --- a/cachegrind/cg_sim_gen.c +++ /dev/null @@ -1,212 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- Generic stuff shared by all cache simulation files. ---*/ -/*--- vg_cachesim_gen.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2002 Nicholas Nethercote - njn25@cam.ac.uk - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -/* Notes: - - simulates a write-allocate cache - - (block --> set) hash function uses simple bit selection - - handling of references straddling two cache blocks: - - counts as only one cache access (not two) - - both blocks hit --> one hit - - one block hits, the other misses --> one miss - - both blocks miss --> one miss (not two) -*/ - -#ifndef __VG_CACHESIM_GEN_C -#define __VG_CACHESIM_GEN_C - -typedef struct { - int size; /* bytes */ - int assoc; - int line_size; /* bytes */ - int sets; - int sets_min_1; - int assoc_bits; - int line_size_bits; - int tag_shift; - char desc_line[128]; - int* tags; -} cache_t2; - -/* By this point, the size/assoc/line_size has been checked. */ -static void cachesim_initcache(cache_t config, cache_t2* c) -{ - int i; - - c->size = config.size; - c->assoc = config.assoc; - c->line_size = config.line_size; - - c->sets = (c->size / c->line_size) / c->assoc; - c->sets_min_1 = c->sets - 1; - c->assoc_bits = VG_(log2)(c->assoc); - c->line_size_bits = VG_(log2)(c->line_size); - c->tag_shift = c->line_size_bits + VG_(log2)(c->sets); - - if (c->assoc == 1) { - VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped", - c->size, c->line_size); - } else { - VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative", - c->size, c->line_size, c->assoc); - } - - c->tags = VG_(malloc)(VG_AR_PRIVATE, - sizeof(UInt) * c->sets * c->assoc); - - for (i = 0; i < c->sets * c->assoc; i++) - c->tags[i] = 0; -} - -#if 0 -static void print_cache(cache_t2* c) -{ - UInt set, way, i; - - /* Note initialisation and update of 'i'. */ - for (i = 0, set = 0; set < c->sets; set++) { - for (way = 0; way < c->assoc; way++, i++) { - VG_(printf)("%8x ", c->tags[i]); - } - VG_(printf)("\n"); - } -} -#endif - -/* XXX: This is done as a macro rather than by passing in the cache_t2 as - * an arg because it slows things down by a small amount (3-5%) due to all that - * extra indirection. */ - -#define CACHESIM(L, MISS_TREATMENT) \ -/* The cache and associated bits and pieces. */ \ -static cache_t2 L; \ - \ -static void cachesim_##L##_initcache(cache_t config) \ -{ \ - cachesim_initcache(config, &L); \ -} \ - \ -static __inline__ \ -void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *m2) \ -{ \ - register UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \ - register UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \ - register UInt tag = a >> L.tag_shift; \ - int i, j; \ - Bool is_miss = False; \ - int* set; \ - \ - /* First case: word entirely within line. */ \ - if (set1 == set2) { \ - \ - /* Shifting is a bit faster than multiplying */ \ - set = &(L.tags[set1 << L.assoc_bits]); \ - \ - /* This loop is unrolled for just the first case, which is the most */\ - /* common. We can't unroll any further because it would screw up */\ - /* if we have a direct-mapped (1-way) cache. */\ - if (tag == set[0]) { \ - return; \ - } \ - /* If the tag is one other than the MRU, move it into the MRU spot */\ - /* and shuffle the rest down. */\ - for (i = 1; i < L.assoc; i++) { \ - if (tag == set[i]) { \ - for (j = i; j > 0; j--) { \ - set[j] = set[j - 1]; \ - } \ - set[0] = tag; \ - return; \ - } \ - } \ - \ - /* A miss; install this tag as MRU, shuffle rest down. */ \ - for (j = L.assoc - 1; j > 0; j--) { \ - set[j] = set[j - 1]; \ - } \ - set[0] = tag; \ - MISS_TREATMENT; \ - return; \ - \ - /* Second case: word straddles two lines. */ \ - /* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \ - } else if (((set1 + 1) & (L.sets-1)) == set2) { \ - set = &(L.tags[set1 << L.assoc_bits]); \ - if (tag == set[0]) { \ - goto block2; \ - } \ - for (i = 1; i < L.assoc; i++) { \ - if (tag == set[i]) { \ - for (j = i; j > 0; j--) { \ - set[j] = set[j - 1]; \ - } \ - set[0] = tag; \ - goto block2; \ - } \ - } \ - for (j = L.assoc - 1; j > 0; j--) { \ - set[j] = set[j - 1]; \ - } \ - set[0] = tag; \ - is_miss = True; \ -block2: \ - set = &(L.tags[set2 << L.assoc_bits]); \ - if (tag == set[0]) { \ - goto miss_treatment; \ - } \ - for (i = 1; i < L.assoc; i++) { \ - if (tag == set[i]) { \ - for (j = i; j > 0; j--) { \ - set[j] = set[j - 1]; \ - } \ - set[0] = tag; \ - goto miss_treatment; \ - } \ - } \ - for (j = L.assoc - 1; j > 0; j--) { \ - set[j] = set[j - 1]; \ - } \ - set[0] = tag; \ - is_miss = True; \ -miss_treatment: \ - if (is_miss) { MISS_TREATMENT; } \ - \ - } else { \ - VG_(panic)("item straddles more than two cache sets"); \ - } \ - return; \ -} - -#endif /* ndef __VG_CACHESIM_GEN_C */ - -/*--------------------------------------------------------------------*/ -/*--- end vg_cachesim_gen.c ---*/ -/*--------------------------------------------------------------------*/ - diff --git a/cachegrind/docs/Makefile.am b/cachegrind/docs/Makefile.am deleted file mode 100644 index e8a58fa18e..0000000000 --- a/cachegrind/docs/Makefile.am +++ /dev/null @@ -1,5 +0,0 @@ -docdir = $(datadir)/doc/valgrind - -doc_DATA = index.html manual.html nav.html techdocs.html - -EXTRA_DIST = $(doc_DATA) diff --git a/cachegrind/docs/index.html b/cachegrind/docs/index.html deleted file mode 100644 index 1111702565..0000000000 --- a/cachegrind/docs/index.html +++ /dev/null @@ -1,26 +0,0 @@ - - - - - - - - - - - Valgrind's user manual - - - - - - - <body> - <p>This page uses frames, but your browser doesn't support them.</p> - </body> - - - - diff --git a/cachegrind/docs/manual.html b/cachegrind/docs/manual.html deleted file mode 100644 index b715ee3dfe..0000000000 --- a/cachegrind/docs/manual.html +++ /dev/null @@ -1,2702 +0,0 @@ - - - - Valgrind - - - - -  -

Valgrind, version 1.0.0

-
This manual was last updated on 20020726
-

- -

-jseward@acm.org
-Copyright © 2000-2002 Julian Seward -

-Valgrind is licensed under the GNU General Public License, -version 2
-An open-source tool for finding memory-management problems in -Linux-x86 executables. -

- -

- -


- -

Contents of this manual

- -

Introduction

- 1.1  What Valgrind is for
- 1.2  What it does with your program - -

How to use it, and how to make sense - of the results

- 2.1  Getting started
- 2.2  The commentary
- 2.3  Reporting of errors
- 2.4  Suppressing errors
- 2.5  Command-line flags
- 2.6  Explaination of error messages
- 2.7  Writing suppressions files
- 2.8  The Client Request mechanism
- 2.9  Support for POSIX pthreads
- 2.10  Building and installing
- 2.11  If you have problems
- -

Details of the checking machinery

- 3.1  Valid-value (V) bits
- 3.2  Valid-address (A) bits
- 3.3  Putting it all together
- 3.4  Signals
- 3.5  Memory leak detection
- -

Limitations

- -

How it works -- a rough overview

- 5.1  Getting started
- 5.2  The translation/instrumentation engine
- 5.3  Tracking the status of memory
- 5.4  System calls
- 5.5  Signals
- -

An example

- -

Cache profiling

- -

The design and implementation of Valgrind

- -
- - -

1  Introduction

- - -

1.1  What Valgrind is for

- -Valgrind is a tool to help you find memory-management problems in your -programs. When a program is run under Valgrind's supervision, all -reads and writes of memory are checked, and calls to -malloc/new/free/delete are intercepted. As a result, Valgrind can -detect problems such as: -
    -
  • Use of uninitialised memory
  • -
  • Reading/writing memory after it has been free'd
  • -
  • Reading/writing off the end of malloc'd blocks
  • -
  • Reading/writing inappropriate areas on the stack
  • -
  • Memory leaks -- where pointers to malloc'd blocks are lost - forever
  • -
  • Mismatched use of malloc/new/new [] vs free/delete/delete - []
  • -
  • Some misuses of the POSIX pthreads API
  • -
- -Problems like these can be difficult to find by other means, often -lying undetected for long periods, then causing occasional, -difficult-to-diagnose crashes. - -

-Valgrind is closely tied to details of the CPU, operating system and -to a less extent, compiler and basic C libraries. This makes it -difficult to make it portable, so I have chosen at the outset to -concentrate on what I believe to be a widely used platform: Linux on -x86s. Valgrind uses the standard Unix ./configure, -make, make install mechanism, and I have -attempted to ensure that it works on machines with kernel 2.2 or 2.4 -and glibc 2.1.X or 2.2.X. This should cover the vast majority of -modern Linux installations. - - -

-Valgrind is licensed under the GNU General Public License, version -2. Read the file LICENSE in the source distribution for details. Some -of the PThreads test cases, test/pth_*.c, are taken from -"Pthreads Programming" by Bradford Nichols, Dick Buttlar & Jacqueline -Proulx Farrell, ISBN 1-56592-115-1, published by O'Reilly & -Associates, Inc. - - - -

1.2  What it does with your program

- -Valgrind is designed to be as non-intrusive as possible. It works -directly with existing executables. You don't need to recompile, -relink, or otherwise modify, the program to be checked. Simply place -the word valgrind at the start of the command line -normally used to run the program. So, for example, if you want to run -the command ls -l on Valgrind, simply issue the -command: valgrind ls -l. - -

Valgrind takes control of your program before it starts. Debugging -information is read from the executable and associated libraries, so -that error messages can be phrased in terms of source code -locations. Your program is then run on a synthetic x86 CPU which -checks every memory access. All detected errors are written to a -log. When the program finishes, Valgrind searches for and reports on -leaked memory. - -

You can run pretty much any dynamically linked ELF x86 executable -using Valgrind. Programs run 25 to 50 times slower, and take a lot -more memory, than they usually would. It works well enough to run -large programs. For example, the Konqueror web browser from the KDE -Desktop Environment, version 3.0, runs slowly but usably on Valgrind. - -

Valgrind simulates every single instruction your program executes. -Because of this, it finds errors not only in your application but also -in all supporting dynamically-linked (.so-format) -libraries, including the GNU C library, the X client libraries, Qt, if -you work with KDE, and so on. That often includes libraries, for -example the GNU C library, which contain memory access violations, but -which you cannot or do not want to fix. - -

Rather than swamping you with errors in which you are not -interested, Valgrind allows you to selectively suppress errors, by -recording them in a suppressions file which is read when Valgrind -starts up. The build mechanism attempts to select suppressions which -give reasonable behaviour for the libc and XFree86 versions detected -on your machine. - - -

Section 6 shows an example of use. -

-


- - -

2  How to use it, and how to make sense of the results

- - -

2.1  Getting started

- -First off, consider whether it might be beneficial to recompile your -application and supporting libraries with optimisation disabled and -debugging info enabled (the -g flag). You don't have to -do this, but doing so helps Valgrind produce more accurate and less -confusing error reports. Chances are you're set up like this already, -if you intended to debug your program with GNU gdb, or some other -debugger. - -

-A plausible compromise is to use -g -O. -Optimisation levels above -O have been observed, on very -rare occasions, to cause gcc to generate code which fools Valgrind's -error tracking machinery into wrongly reporting uninitialised value -errors. -O gets you the vast majority of the benefits of -higher optimisation levels anyway, so you don't lose much there. - -

-Valgrind understands both the older "stabs" debugging format, used by -gcc versions prior to 3.1, and the newer DWARF2 format used by gcc 3.1 -and later. - -

-Then just run your application, but place the word -valgrind in front of your usual command-line invokation. -Note that you should run the real (machine-code) executable here. If -your application is started by, for example, a shell or perl script, -you'll need to modify it to invoke Valgrind on the real executables. -Running such scripts directly under Valgrind will result in you -getting error reports pertaining to /bin/sh, -/usr/bin/perl, or whatever interpreter you're using. -This almost certainly isn't what you want and can be confusing. - - -

2.2  The commentary

- -Valgrind writes a commentary, detailing error reports and other -significant events. The commentary goes to standard output by -default. This may interfere with your program, so you can ask for it -to be directed elsewhere. - -

All lines in the commentary are of the following form:
-

-  ==12345== some-message-from-Valgrind
-
-

The 12345 is the process ID. This scheme makes it easy -to distinguish program output from Valgrind commentary, and also easy -to differentiate commentaries from different processes which have -become merged together, for whatever reason. - -

By default, Valgrind writes only essential messages to the commentary, -so as to avoid flooding you with information of secondary importance. -If you want more information about what is happening, re-run, passing -the -v flag to Valgrind. - - - -

2.3  Reporting of errors

- -When Valgrind detects something bad happening in the program, an error -message is written to the commentary. For example:
-
-  ==25832== Invalid read of size 4
-  ==25832==    at 0x8048724: BandMatrix::ReSize(int, int, int) (bogon.cpp:45)
-  ==25832==    by 0x80487AF: main (bogon.cpp:66)
-  ==25832==    by 0x40371E5E: __libc_start_main (libc-start.c:129)
-  ==25832==    by 0x80485D1: (within /home/sewardj/newmat10/bogon)
-  ==25832==    Address 0xBFFFF74C is not stack'd, malloc'd or free'd
-
- -

This message says that the program did an illegal 4-byte read of -address 0xBFFFF74C, which, as far as it can tell, is not a valid stack -address, nor corresponds to any currently malloc'd or free'd blocks. -The read is happening at line 45 of bogon.cpp, called -from line 66 of the same file, etc. For errors associated with an -identified malloc'd/free'd block, for example reading free'd memory, -Valgrind reports not only the location where the error happened, but -also where the associated block was malloc'd/free'd. - -

Valgrind remembers all error reports. When an error is detected, -it is compared against old reports, to see if it is a duplicate. If -so, the error is noted, but no further commentary is emitted. This -avoids you being swamped with bazillions of duplicate error reports. - -

If you want to know how many times each error occurred, run with -the -v option. When execution finishes, all the reports -are printed out, along with, and sorted by, their occurrence counts. -This makes it easy to see which errors have occurred most frequently. - -

Errors are reported before the associated operation actually -happens. For example, if you program decides to read from address -zero, Valgrind will emit a message to this effect, and the program -will then duly die with a segmentation fault. - -

In general, you should try and fix errors in the order that they -are reported. Not doing so can be confusing. For example, a program -which copies uninitialised values to several memory locations, and -later uses them, will generate several error messages. The first such -error message may well give the most direct clue to the root cause of -the problem. - -

The process of detecting duplicate errors is quite an expensive -one and can become a significant performance overhead if your program -generates huge quantities of errors. To avoid serious problems here, -Valgrind will simply stop collecting errors after 300 different errors -have been seen, or 30000 errors in total have been seen. In this -situation you might as well stop your program and fix it, because -Valgrind won't tell you anything else useful after this. Note that -the 300/30000 limits apply after suppressed errors are removed. These -limits are defined in vg_include.h and can be increased -if necessary. - -

To avoid this cutoff you can use the ---error-limit=no flag. Then valgrind will always show -errors, regardless of how many there are. Use this flag carefully, -since it may have a dire effect on performance. - - - -

2.4  Suppressing errors

- -Valgrind detects numerous problems in the base libraries, such as the -GNU C library, and the XFree86 client libraries, which come -pre-installed on your GNU/Linux system. You can't easily fix these, -but you don't want to see these errors (and yes, there are many!) So -Valgrind reads a list of errors to suppress at startup. -A default suppression file is cooked up by the -./configure script. - -

You can modify and add to the suppressions file at your leisure, -or, better, write your own. Multiple suppression files are allowed. -This is useful if part of your project contains errors you can't or -don't want to fix, yet you don't want to continuously be reminded of -them. - -

Each error to be suppressed is described very specifically, to -minimise the possibility that a suppression-directive inadvertantly -suppresses a bunch of similar errors which you did want to see. The -suppression mechanism is designed to allow precise yet flexible -specification of errors to suppress. - -

If you use the -v flag, at the end of execution, Valgrind -prints out one line for each used suppression, giving its name and the -number of times it got used. Here's the suppressions used by a run of -ls -l: -

-  --27579-- supp: 1 socketcall.connect(serv_addr)/__libc_connect/__nscd_getgrgid_r
-  --27579-- supp: 1 socketcall.connect(serv_addr)/__libc_connect/__nscd_getpwuid_r
-  --27579-- supp: 6 strrchr/_dl_map_object_from_fd/_dl_map_object
-
- - -

2.5  Command-line flags

- -You invoke Valgrind like this: -
-  valgrind [options-for-Valgrind] your-prog [options for your-prog]
-
- -

Note that Valgrind also reads options from the environment variable -$VALGRIND, and processes them before the command-line -options. - -

Valgrind's default settings succeed in giving reasonable behaviour -in most cases. Available options, in no particular order, are as -follows: -

    -
  • --help

  • - -
  • --version
    -

    The usual deal.


  • - -

  • -v --verbose
    -

    Be more verbose. Gives extra information on various aspects - of your program, such as: the shared objects loaded, the - suppressions used, the progress of the instrumentation engine, - and warnings about unusual behaviour. -


  • - -

  • -q --quiet
    -

    Run silently, and only print error messages. Useful if you - are running regression tests or have some other automated test - machinery. -


  • - -

  • --demangle=no
    - --demangle=yes [the default] -

    Disable/enable automatic demangling (decoding) of C++ names. - Enabled by default. When enabled, Valgrind will attempt to - translate encoded C++ procedure names back to something - approaching the original. The demangler handles symbols mangled - by g++ versions 2.X and 3.X. - -

    An important fact about demangling is that function - names mentioned in suppressions files should be in their mangled - form. Valgrind does not demangle function names when searching - for applicable suppressions, because to do otherwise would make - suppressions file contents dependent on the state of Valgrind's - demangling machinery, and would also be slow and pointless. -


  • - -

  • --num-callers=<number> [default=4]
    -

    By default, Valgrind shows four levels of function call names - to help you identify program locations. You can change that - number with this option. This can help in determining the - program's location in deeply-nested call chains. Note that errors - are commoned up using only the top three function locations (the - place in the current function, and that of its two immediate - callers). So this doesn't affect the total number of errors - reported. -

    - The maximum value for this is 50. Note that higher settings - will make Valgrind run a bit more slowly and take a bit more - memory, but can be useful when working with programs with - deeply-nested call chains. -


  • - -

  • --gdb-attach=no [the default]
    - --gdb-attach=yes -

    When enabled, Valgrind will pause after every error shown, - and print the line -
    - ---- Attach to GDB ? --- [Return/N/n/Y/y/C/c] ---- -

    - Pressing Ret, or N Ret - or n Ret, causes Valgrind not to - start GDB for this error. -

    - Y Ret - or y Ret causes Valgrind to - start GDB, for the program at this point. When you have - finished with GDB, quit from it, and the program will continue. - Trying to continue from inside GDB doesn't work. -

    - C Ret - or c Ret causes Valgrind not to - start GDB, and not to ask again. -

    - --gdb-attach=yes conflicts with - --trace-children=yes. You can't use them together. - Valgrind refuses to start up in this situation. 1 May 2002: - this is a historical relic which could be easily fixed if it - gets in your way. Mail me and complain if this is a problem for - you.


  • - -

  • --partial-loads-ok=yes [the default]
    - --partial-loads-ok=no -

    Controls how Valgrind handles word (4-byte) loads from - addresses for which some bytes are addressible and others - are not. When yes (the default), such loads - do not elicit an address error. Instead, the loaded V bytes - corresponding to the illegal addresses indicate undefined, and - those corresponding to legal addresses are loaded from shadow - memory, as usual. -

    - When no, loads from partially - invalid addresses are treated the same as loads from completely - invalid addresses: an illegal-address error is issued, - and the resulting V bytes indicate valid data. -


  • - -

  • --sloppy-malloc=no [the default]
    - --sloppy-malloc=yes -

    When enabled, all requests for malloc/calloc are rounded up - to a whole number of machine words -- in other words, made - divisible by 4. For example, a request for 17 bytes of space - would result in a 20-byte area being made available. This works - around bugs in sloppy libraries which assume that they can - safely rely on malloc/calloc requests being rounded up in this - fashion. Without the workaround, these libraries tend to - generate large numbers of errors when they access the ends of - these areas. -

    - Valgrind snapshots dated 17 Feb 2002 and later are - cleverer about this problem, and you should no longer need to - use this flag. To put it bluntly, if you do need to use this - flag, your program violates the ANSI C semantics defined for - malloc and free, even if it appears to - work correctly, and you should fix it, at least if you hope for - maximum portability. -


  • - -

  • --alignment=<number> [default: 4]

    By - default valgrind's malloc, realloc, - etc, return 4-byte aligned addresses. These are suitable for - any accesses on x86 processors. - Some programs might however assume that malloc et - al return 8- or more aligned memory. - These programs are broken and should be fixed, but - if this is impossible for whatever reason the alignment can be - increased using this parameter. The supplied value must be - between 4 and 4096 inclusive, and must be a power of two.


  • - -

  • --trace-children=no [the default]
    - --trace-children=yes -

    When enabled, Valgrind will trace into child processes. This - is confusing and usually not what you want, so is disabled by - default. As of 1 May 2002, tracing into a child process from a - parent which uses libpthread.so is probably broken - and is likely to cause breakage. Please report any such - problems to me.


  • - -

  • --freelist-vol=<number> [default: 1000000] -

    When the client program releases memory using free (in C) or - delete (C++), that memory is not immediately made available for - re-allocation. Instead it is marked inaccessible and placed in - a queue of freed blocks. The purpose is to delay the point at - which freed-up memory comes back into circulation. This - increases the chance that Valgrind will be able to detect - invalid accesses to blocks for some significant period of time - after they have been freed. -

    - This flag specifies the maximum total size, in bytes, of the - blocks in the queue. The default value is one million bytes. - Increasing this increases the total amount of memory used by - Valgrind but may detect invalid uses of freed blocks which would - otherwise go undetected.


  • - -

  • --logfile-fd=<number> [default: 2, stderr] -

    Specifies the file descriptor on which Valgrind communicates - all of its messages. The default, 2, is the standard error - channel. This may interfere with the client's own use of - stderr. To dump Valgrind's commentary in a file without using - stderr, something like the following works well (sh/bash - syntax):
    -    - valgrind --logfile-fd=9 my_prog 9> logfile
    - That is: tell Valgrind to send all output to file descriptor 9, - and ask the shell to route file descriptor 9 to "logfile". -


  • - -

  • --suppressions=<filename> - [default: $PREFIX/lib/valgrind/default.supp] -

    Specifies an extra - file from which to read descriptions of errors to suppress. You - may use as many extra suppressions files as you - like.


  • - -

  • --leak-check=no [default]
    - --leak-check=yes -

    When enabled, search for memory leaks when the client program - finishes. A memory leak means a malloc'd block, which has not - yet been free'd, but to which no pointer can be found. Such a - block can never be free'd by the program, since no pointer to it - exists. Leak checking is disabled by default because it tends - to generate dozens of error messages.


  • - -

  • --show-reachable=no [default]
    - --show-reachable=yes -

    When disabled, the memory leak detector only shows blocks for - which it cannot find a pointer to at all, or it can only find a - pointer to the middle of. These blocks are prime candidates for - memory leaks. When enabled, the leak detector also reports on - blocks which it could find a pointer to. Your program could, at - least in principle, have freed such blocks before exit. - Contrast this to blocks for which no pointer, or only an - interior pointer could be found: they are more likely to - indicate memory leaks, because you do not actually have a - pointer to the start of the block which you can hand to - free, even if you wanted to.


  • - -

  • --leak-resolution=low [default]
    - --leak-resolution=med
    - --leak-resolution=high -

    When doing leak checking, determines how willing Valgrind is - to consider different backtraces to be the same. When set to - low, the default, only the first two entries need - match. When med, four entries have to match. When - high, all entries need to match. -

    - For hardcore leak debugging, you probably want to use - --leak-resolution=high together with - --num-callers=40 or some such large number. Note - however that this can give an overwhelming amount of - information, which is why the defaults are 4 callers and - low-resolution matching. -

    - Note that the --leak-resolution= setting does not - affect Valgrind's ability to find leaks. It only changes how - the results are presented. -


  • - -

  • --workaround-gcc296-bugs=no [default]
    - --workaround-gcc296-bugs=yes

    When enabled, - assume that reads and writes some small distance below the stack - pointer %esp are due to bugs in gcc 2.96, and does - not report them. The "small distance" is 256 bytes by default. - Note that gcc 2.96 is the default compiler on some popular Linux - distributions (RedHat 7.X, Mandrake) and so you may well need to - use this flag. Do not use it if you do not have to, as it can - cause real errors to be overlooked. Another option is to use a - gcc/g++ which does not generate accesses below the stack - pointer. 2.95.3 seems to be a good choice in this respect. -

    - Unfortunately (27 Feb 02) it looks like g++ 3.0.4 has a similar - bug, so you may need to issue this flag if you use 3.0.4. A - while later (early Apr 02) this is confirmed as a scheduling bug - in g++-3.0.4. -


  • - -

  • --error-limit=yes [default]
    - --error-limit=no

    When enabled, valgrind stops - reporting errors after 30000 in total, or 300 different ones, - have been seen. This is to stop the error tracking machinery - from becoming a huge performance overhead in programs with many - errors.


  • - -

  • --cachesim=no [default]
    - --cachesim=yes

    When enabled, turns off memory - checking, and turns on cache profiling. Cache profiling is - described in detail in Section 7. -


  • - -

  • --weird-hacks=hack1,hack2,... - Pass miscellaneous hints to Valgrind which slightly modify the - simulated behaviour in nonstandard or dangerous ways, possibly - to help the simulation of strange features. By default no hacks - are enabled. Use with caution! Currently known hacks are: -

    -

      -
    • ioctl-VTIME Use this if you have a program - which sets readable file descriptors to have a timeout by - doing ioctl on them with a - TCSETA-style command and a non-zero - VTIME timeout value. This is considered - potentially dangerous and therefore is not engaged by - default, because it is (remotely) conceivable that it could - cause threads doing read to incorrectly block - the entire process. -

      - You probably want to try this one if you have a program - which unexpectedly blocks in a read from a file - descriptor which you know to have been messed with by - ioctl. This could happen, for example, if the - descriptor is used to read input from some kind of screen - handling library. -

      - To find out if your program is blocking unexpectedly in the - read system call, run with - --trace-syscalls=yes flag. -

      -

    • truncate-writes Use this if you have a threaded - program which appears to unexpectedly block whilst writing - into a pipe. The effect is to modify all calls to - write() so that requests to write more than - 4096 bytes are treated as if they only requested a write of - 4096 bytes. Valgrind does this by changing the - count argument of write(), as - passed to the kernel, so that it is at most 4096. The - amount of data written will then be less than the client - program asked for, but the client should have a loop around - its write() call to check whether the requested - number of bytes have been written. If not, it should issue - further write() calls until all the data is - written. -

      - This all sounds pretty dodgy to me, which is why I've made - this behaviour only happen on request. It is not the - default behaviour. At the time of writing this (30 June - 2002) I have only seen one example where this is necessary, - so either the problem is extremely rare or nobody is using - Valgrind :-) -

      - On experimentation I see that truncate-writes - doesn't interact well with ioctl-VTIME, so you - probably don't want to try both at once. -

      - As above, to find out if your program is blocking - unexpectedly in the write() system call, you - may find the --trace-syscalls=yes - --trace-sched=yes flags useful. -

    - -
  • -

- -There are also some options for debugging Valgrind itself. You -shouldn't need to use them in the normal run of things. Nevertheless: - -
    - -
  • --single-step=no [default]
    - --single-step=yes -

    When enabled, each x86 insn is translated seperately into - instrumented code. When disabled, translation is done on a - per-basic-block basis, giving much better translations.


  • -

    - -

  • --optimise=no
    - --optimise=yes [default] -

    When enabled, various improvements are applied to the - intermediate code, mainly aimed at allowing the simulated CPU's - registers to be cached in the real CPU's registers over several - simulated instructions.


  • -

    - -

  • --instrument=no
    - --instrument=yes [default] -

    When disabled, the translations don't actually contain any - instrumentation.


  • -

    - -

  • --cleanup=no
    - --cleanup=yes [default] -

    When enabled, various improvments are applied to the - post-instrumented intermediate code, aimed at removing redundant - value checks.


  • -

    - -

  • --trace-syscalls=no [default]
    - --trace-syscalls=yes -

    Enable/disable tracing of system call intercepts.


  • -

    - -

  • --trace-signals=no [default]
    - --trace-signals=yes -

    Enable/disable tracing of signal handling.


  • -

    - -

  • --trace-sched=no [default]
    - --trace-sched=yes -

    Enable/disable tracing of thread scheduling events.


  • -

    - -

  • --trace-pthread=none [default]
    - --trace-pthread=some
    - --trace-pthread=all -

    Specifies amount of trace detail for pthread-related events.


  • -

    - -

  • --trace-symtab=no [default]
    - --trace-symtab=yes -

    Enable/disable tracing of symbol table reading.


  • -

    - -

  • --trace-malloc=no [default]
    - --trace-malloc=yes -

    Enable/disable tracing of malloc/free (et al) intercepts. -


  • -

    - -

  • --stop-after=<number> - [default: infinity, more or less] -

    After <number> basic blocks have been executed, shut down - Valgrind and switch back to running the client on the real CPU. -


  • -

    - -

  • --dump-error=<number> [default: inactive] -

    After the program has exited, show gory details of the - translation of the basic block containing the <number>'th - error context. When used with --single-step=yes, - can show the exact x86 instruction causing an error. This is - all fairly dodgy and doesn't work at all if threads are - involved.


  • -

    -

- - - -

2.6  Explaination of error messages

- -Despite considerable sophistication under the hood, Valgrind can only -really detect two kinds of errors, use of illegal addresses, and use -of undefined values. Nevertheless, this is enough to help you -discover all sorts of memory-management nasties in your code. This -section presents a quick summary of what error messages mean. The -precise behaviour of the error-checking machinery is described in -Section 4. - - -

2.6.1  Illegal read / Illegal write errors

-For example: -
-  Invalid read of size 4
-     at 0x40F6BBCC: (within /usr/lib/libpng.so.2.1.0.9)
-     by 0x40F6B804: (within /usr/lib/libpng.so.2.1.0.9)
-     by 0x40B07FF4: read_png_image__FP8QImageIO (kernel/qpngio.cpp:326)
-     by 0x40AC751B: QImageIO::read() (kernel/qimage.cpp:3621)
-     Address 0xBFFFF0E0 is not stack'd, malloc'd or free'd
-
- -

This happens when your program reads or writes memory at a place -which Valgrind reckons it shouldn't. In this example, the program did -a 4-byte read at address 0xBFFFF0E0, somewhere within the -system-supplied library libpng.so.2.1.0.9, which was called from -somewhere else in the same library, called from line 326 of -qpngio.cpp, and so on. - -

Valgrind tries to establish what the illegal address might relate -to, since that's often useful. So, if it points into a block of -memory which has already been freed, you'll be informed of this, and -also where the block was free'd at. Likewise, if it should turn out -to be just off the end of a malloc'd block, a common result of -off-by-one-errors in array subscripting, you'll be informed of this -fact, and also where the block was malloc'd. - -

In this example, Valgrind can't identify the address. Actually the -address is on the stack, but, for some reason, this is not a valid -stack address -- it is below the stack pointer, %esp, and that isn't -allowed. In this particular case it's probably caused by gcc -generating invalid code, a known bug in various flavours of gcc. - -

Note that Valgrind only tells you that your program is about to -access memory at an illegal address. It can't stop the access from -happening. So, if your program makes an access which normally would -result in a segmentation fault, you program will still suffer the same -fate -- but you will get a message from Valgrind immediately prior to -this. In this particular example, reading junk on the stack is -non-fatal, and the program stays alive. - - -

2.6.2  Use of uninitialised values

-For example: -
-  Conditional jump or move depends on uninitialised value(s)
-     at 0x402DFA94: _IO_vfprintf (_itoa.h:49)
-     by 0x402E8476: _IO_printf (printf.c:36)
-     by 0x8048472: main (tests/manuel1.c:8)
-     by 0x402A6E5E: __libc_start_main (libc-start.c:129)
-
- -

An uninitialised-value use error is reported when your program uses -a value which hasn't been initialised -- in other words, is undefined. -Here, the undefined value is used somewhere inside the printf() -machinery of the C library. This error was reported when running the -following small program: -

-  int main()
-  {
-    int x;
-    printf ("x = %d\n", x);
-  }
-
- -

It is important to understand that your program can copy around -junk (uninitialised) data to its heart's content. Valgrind observes -this and keeps track of the data, but does not complain. A complaint -is issued only when your program attempts to make use of uninitialised -data. In this example, x is uninitialised. Valgrind observes the -value being passed to _IO_printf and thence to _IO_vfprintf, but makes -no comment. However, _IO_vfprintf has to examine the value of x so it -can turn it into the corresponding ASCII string, and it is at this -point that Valgrind complains. - -

Sources of uninitialised data tend to be: -

    -
  • Local variables in procedures which have not been initialised, - as in the example above.

  • - -

  • The contents of malloc'd blocks, before you write something - there. In C++, the new operator is a wrapper round malloc, so - if you create an object with new, its fields will be - uninitialised until you fill them in, which is only Right and - Proper.
  • -
- - - -

2.6.3  Illegal frees

-For example: -
-  Invalid free()
-     at 0x4004FFDF: free (ut_clientmalloc.c:577)
-     by 0x80484C7: main (tests/doublefree.c:10)
-     by 0x402A6E5E: __libc_start_main (libc-start.c:129)
-     by 0x80483B1: (within tests/doublefree)
-     Address 0x3807F7B4 is 0 bytes inside a block of size 177 free'd
-     at 0x4004FFDF: free (ut_clientmalloc.c:577)
-     by 0x80484C7: main (tests/doublefree.c:10)
-     by 0x402A6E5E: __libc_start_main (libc-start.c:129)
-     by 0x80483B1: (within tests/doublefree)
-
-

Valgrind keeps track of the blocks allocated by your program with -malloc/new, so it can know exactly whether or not the argument to -free/delete is legitimate or not. Here, this test program has -freed the same block twice. As with the illegal read/write errors, -Valgrind attempts to make sense of the address free'd. If, as -here, the address is one which has previously been freed, you wil -be told that -- making duplicate frees of the same block easy to spot. - - -

2.6.4  When a block is freed with an inappropriate -deallocation function

-In the following example, a block allocated with new[] -has wrongly been deallocated with free: -
-  Mismatched free() / delete / delete []
-     at 0x40043249: free (vg_clientfuncs.c:171)
-     by 0x4102BB4E: QGArray::~QGArray(void) (tools/qgarray.cpp:149)
-     by 0x4C261C41: PptDoc::~PptDoc(void) (include/qmemarray.h:60)
-     by 0x4C261F0E: PptXml::~PptXml(void) (pptxml.cc:44)
-     Address 0x4BB292A8 is 0 bytes inside a block of size 64 alloc'd
-     at 0x4004318C: __builtin_vec_new (vg_clientfuncs.c:152)
-     by 0x4C21BC15: KLaola::readSBStream(int) const (klaola.cc:314)
-     by 0x4C21C155: KLaola::stream(KLaola::OLENode const *) (klaola.cc:416)
-     by 0x4C21788F: OLEFilter::convert(QCString const &) (olefilter.cc:272)
-
-The following was told to me be the KDE 3 developers. I didn't know -any of it myself. They also implemented the check itself. -

-In C++ it's important to deallocate memory in a way compatible with -how it was allocated. The deal is: -

    -
  • If allocated with malloc, calloc, - realloc, valloc or - memalign, you must deallocate with free. -
  • If allocated with new[], you must deallocate with - delete[]. -
  • If allocated with new, you must deallocate with - delete. -
-The worst thing is that on Linux apparently it doesn't matter if you -do muddle these up, and it all seems to work ok, but the same program -may then crash on a different platform, Solaris for example. So it's -best to fix it properly. According to the KDE folks "it's amazing how -many C++ programmers don't know this". -

-Pascal Massimino adds the following clarification: -delete[] must be called associated with a -new[] because the compiler stores the size of the array -and the pointer-to-member to the destructor of the array's content -just before the pointer actually returned. This implies a -variable-sized overhead in what's returned by new or -new[]. It rather surprising how compilers [Ed: -runtime-support libraries?] are robust to mismatch in -new/delete -new[]/delete[]. - - -

2.6.5  Passing system call parameters with inadequate -read/write permissions

- -Valgrind checks all parameters to system calls. If a system call -needs to read from a buffer provided by your program, Valgrind checks -that the entire buffer is addressible and has valid data, ie, it is -readable. And if the system call needs to write to a user-supplied -buffer, Valgrind checks that the buffer is addressible. After the -system call, Valgrind updates its administrative information to -precisely reflect any changes in memory permissions caused by the -system call. - -

Here's an example of a system call with an invalid parameter: -

-  #include <stdlib.h>
-  #include <unistd.h>
-  int main( void )
-  {
-    char* arr = malloc(10);
-    (void) write( 1 /* stdout */, arr, 10 );
-    return 0;
-  }
-
- -

You get this complaint ... -

-  Syscall param write(buf) contains uninitialised or unaddressable byte(s)
-     at 0x4035E072: __libc_write
-     by 0x402A6E5E: __libc_start_main (libc-start.c:129)
-     by 0x80483B1: (within tests/badwrite)
-     by <bogus frame pointer> ???
-     Address 0x3807E6D0 is 0 bytes inside a block of size 10 alloc'd
-     at 0x4004FEE6: malloc (ut_clientmalloc.c:539)
-     by 0x80484A0: main (tests/badwrite.c:6)
-     by 0x402A6E5E: __libc_start_main (libc-start.c:129)
-     by 0x80483B1: (within tests/badwrite)
-
- -

... because the program has tried to write uninitialised junk from -the malloc'd block to the standard output. - - -

2.6.6  Warning messages you might see

- -Most of these only appear if you run in verbose mode (enabled by --v): -
    -
  • More than 50 errors detected. Subsequent errors - will still be recorded, but in less detail than before. -
    - After 50 different errors have been shown, Valgrind becomes - more conservative about collecting them. It then requires only - the program counters in the top two stack frames to match when - deciding whether or not two errors are really the same one. - Prior to this point, the PCs in the top four frames are required - to match. This hack has the effect of slowing down the - appearance of new errors after the first 50. The 50 constant can - be changed by recompiling Valgrind. -

    -

  • More than 300 errors detected. I'm not reporting any more. - Final error counts may be inaccurate. Go fix your - program! -
    - After 300 different errors have been detected, Valgrind ignores - any more. It seems unlikely that collecting even more different - ones would be of practical help to anybody, and it avoids the - danger that Valgrind spends more and more of its time comparing - new errors against an ever-growing collection. As above, the 300 - number is a compile-time constant. -

    -

  • Warning: client switching stacks? -
    - Valgrind spotted such a large change in the stack pointer, %esp, - that it guesses the client is switching to a different stack. - At this point it makes a kludgey guess where the base of the new - stack is, and sets memory permissions accordingly. You may get - many bogus error messages following this, if Valgrind guesses - wrong. At the moment "large change" is defined as a change of - more that 2000000 in the value of the %esp (stack pointer) - register. -

    -

  • Warning: client attempted to close Valgrind's logfile fd <number> - -
    - Valgrind doesn't allow the client - to close the logfile, because you'd never see any diagnostic - information after that point. If you see this message, - you may want to use the --logfile-fd=<number> - option to specify a different logfile file-descriptor number. -

    -

  • Warning: noted but unhandled ioctl <number> -
    - Valgrind observed a call to one of the vast family of - ioctl system calls, but did not modify its - memory status info (because I have not yet got round to it). - The call will still have gone through, but you may get spurious - errors after this as a result of the non-update of the memory info. -

    -

  • Warning: set address range perms: large range <number> -
    - Diagnostic message, mostly for my benefit, to do with memory - permissions. -
- - - -

2.7  Writing suppressions files

- -A suppression file describes a bunch of errors which, for one reason -or another, you don't want Valgrind to tell you about. Usually the -reason is that the system libraries are buggy but unfixable, at least -within the scope of the current debugging session. Multiple -suppressions files are allowed. By default, Valgrind uses -$PREFIX/lib/valgrind/default.supp. - -

-You can ask to add suppressions from another file, by specifying ---suppressions=/path/to/file.supp. - -

Each suppression has the following components:
-

    - -
  • Its name. This merely gives a handy name to the suppression, by - which it is referred to in the summary of used suppressions - printed out when a program finishes. It's not important what - the name is; any identifying string will do. -

    - -

  • The nature of the error to suppress. Either: - Value1, - Value2, - Value4 or - Value8, - meaning an uninitialised-value error when - using a value of 1, 2, 4 or 8 bytes. - Or - Cond (or its old name, Value0), - meaning use of an uninitialised CPU condition code. Or: - Addr1, - Addr2, - Addr4 or - Addr8, meaning an invalid address during a - memory access of 1, 2, 4 or 8 bytes respectively. Or - Param, - meaning an invalid system call parameter error. Or - Free, meaning an invalid or mismatching free. - Or PThread, meaning any kind of complaint to do - with the PThreads API.

  • -

    - -

  • The "immediate location" specification. For Value and Addr - errors, is either the name of the function in which the error - occurred, or, failing that, the full path the the .so file - containing the error location. For Param errors, is the name of - the offending system call parameter. For Free errors, is the - name of the function doing the freeing (eg, free, - __builtin_vec_delete, etc)

  • -

    - -

  • The caller of the above "immediate location". Again, either a - function or shared-object name.

  • -

    - -

  • Optionally, one or two extra calling-function or object names, - for greater precision.
  • -
- -

-Locations may be either names of shared objects or wildcards matching -function names. They begin obj: and fun: -respectively. Function and object names to match against may use the -wildcard characters * and ?. - -A suppression only suppresses an error when the error matches all the -details in the suppression. Here's an example: -

-  {
-    __gconv_transform_ascii_internal/__mbrtowc/mbtowc
-    Value4
-    fun:__gconv_transform_ascii_internal
-    fun:__mbr*toc
-    fun:mbtowc
-  }
-
- -

What is means is: suppress a use-of-uninitialised-value error, when -the data size is 4, when it occurs in the function -__gconv_transform_ascii_internal, when that is called -from any function of name matching __mbr*toc, -when that is called from -mbtowc. It doesn't apply under any other circumstances. -The string by which this suppression is identified to the user is -__gconv_transform_ascii_internal/__mbrtowc/mbtowc. - -

Another example: -

-  {
-    libX11.so.6.2/libX11.so.6.2/libXaw.so.7.0
-    Value4
-    obj:/usr/X11R6/lib/libX11.so.6.2
-    obj:/usr/X11R6/lib/libX11.so.6.2
-    obj:/usr/X11R6/lib/libXaw.so.7.0
-  }
-
- -

Suppress any size 4 uninitialised-value error which occurs anywhere -in libX11.so.6.2, when called from anywhere in the same -library, when called from anywhere in libXaw.so.7.0. The -inexact specification of locations is regrettable, but is about all -you can hope for, given that the X11 libraries shipped with Red Hat -7.2 have had their symbol tables removed. - -

Note -- since the above two examples did not make it clear -- that -you can freely mix the obj: and fun: -styles of description within a single suppression record. - - - -

2.8  The Client Request mechanism

- -Valgrind has a trapdoor mechanism via which the client program can -pass all manner of requests and queries to Valgrind. Internally, this -is used extensively to make malloc, free, signals, threads, etc, work, -although you don't see that. -

-For your convenience, a subset of these so-called client requests is -provided to allow you to tell Valgrind facts about the behaviour of -your program, and conversely to make queries. In particular, your -program can tell Valgrind about changes in memory range permissions -that Valgrind would not otherwise know about, and so allows clients to -get Valgrind to do arbitrary custom checks. -

-Clients need to include the header file valgrind.h to -make this work. The macros therein have the magical property that -they generate code in-line which Valgrind can spot. However, the code -does nothing when not run on Valgrind, so you are not forced to run -your program on Valgrind just because you use the macros in this file. -Also, you are not required to link your program with any extra -supporting libraries. -

-A brief description of the available macros: -

    -
  • VALGRIND_MAKE_NOACCESS, - VALGRIND_MAKE_WRITABLE and - VALGRIND_MAKE_READABLE. These mark address - ranges as completely inaccessible, accessible but containing - undefined data, and accessible and containing defined data, - respectively. Subsequent errors may have their faulting - addresses described in terms of these blocks. Returns a - "block handle". Returns zero when not run on Valgrind. -

    -

  • VALGRIND_DISCARD: At some point you may want - Valgrind to stop reporting errors in terms of the blocks - defined by the previous three macros. To do this, the above - macros return a small-integer "block handle". You can pass - this block handle to VALGRIND_DISCARD. After - doing so, Valgrind will no longer be able to relate - addressing errors to the user-defined block associated with - the handle. The permissions settings associated with the - handle remain in place; this just affects how errors are - reported, not whether they are reported. Returns 1 for an - invalid handle and 0 for a valid handle (although passing - invalid handles is harmless). Always returns 0 when not run - on Valgrind. -

    -

  • VALGRIND_CHECK_NOACCESS, - VALGRIND_CHECK_WRITABLE and - VALGRIND_CHECK_READABLE: check immediately - whether or not the given address range has the relevant - property, and if not, print an error message. Also, for the - convenience of the client, returns zero if the relevant - property holds; otherwise, the returned value is the address - of the first byte for which the property is not true. - Always returns 0 when not run on Valgrind. -

    -

  • VALGRIND_CHECK_NOACCESS: a quick and easy way - to find out whether Valgrind thinks a particular variable - (lvalue, to be precise) is addressible and defined. Prints - an error message if not. Returns no value. -

    -

  • VALGRIND_MAKE_NOACCESS_STACK: a highly - experimental feature. Similarly to - VALGRIND_MAKE_NOACCESS, this marks an address - range as inaccessible, so that subsequent accesses to an - address in the range gives an error. However, this macro - does not return a block handle. Instead, all annotations - created like this are reviewed at each client - ret (subroutine return) instruction, and those - which now define an address range block the client's stack - pointer register (%esp) are automatically - deleted. -

    - In other words, this macro allows the client to tell - Valgrind about red-zones on its own stack. Valgrind - automatically discards this information when the stack - retreats past such blocks. Beware: hacky and flaky, and - probably interacts badly with the new pthread support. -

    -

  • RUNNING_ON_VALGRIND: returns 1 if running on - Valgrind, 0 if running on the real CPU. -

    -

  • VALGRIND_DO_LEAK_CHECK: run the memory leak detector - right now. Returns no value. I guess this could be used to - incrementally check for leaks between arbitrary places in the - program's execution. Warning: not properly tested! -

    -

  • VALGRIND_DISCARD_TRANSLATIONS: discard translations - of code in the specified address range. Useful if you are - debugging a JITter or some other dynamic code generation system. - After this call, attempts to execute code in the invalidated - address range will cause valgrind to make new translations of that - code, which is probably the semantics you want. Note that this is - implemented naively, and involves checking all 200191 entries in - the translation table to see if any of them overlap the specified - address range. So try not to call it often, or performance will - nosedive. Note that you can be clever about this: you only need - to call it when an area which previously contained code is - overwritten with new code. You can choose to write code into - fresh memory, and just call this occasionally to discard large - chunks of old code all at once. -

    - Warning: minimally tested, especially for the cache simulator. -

-

- - - -

2.9  Support for POSIX Pthreads

- -As of late April 02, Valgrind supports programs which use POSIX -pthreads. Doing this has proved technically challenging but is now -mostly complete. It works well enough for significant threaded -applications to work. -

-It works as follows: threaded apps are (dynamically) linked against -libpthread.so. Usually this is the one installed with -your Linux distribution. Valgrind, however, supplies its own -libpthread.so and automatically connects your program to -it instead. -

-The fake libpthread.so and Valgrind cooperate to -implement a user-space pthreads package. This approach avoids the -horrible implementation problems of implementing a truly -multiprocessor version of Valgrind, but it does mean that threaded -apps run only on one CPU, even if you have a multiprocessor machine. -

-Valgrind schedules your threads in a round-robin fashion, with all -threads having equal priority. It switches threads every 50000 basic -blocks (typically around 300000 x86 instructions), which means you'll -get a much finer interleaving of thread executions than when run -natively. This in itself may cause your program to behave differently -if you have some kind of concurrency, critical race, locking, or -similar, bugs. -

-The current (valgrind-1.0 release) state of pthread support is as -follows: -

    -
  • Mutexes, condition variables, thread-specific data, - pthread_once, reader-writer locks, semaphores, - cleanup stacks, cancellation and thread detaching currently work. - Various attribute-like calls are handled but ignored; you get a - warning message. -

    -

  • Currently the following syscalls are thread-safe (nonblocking): - write read nanosleep - sleep select poll - recvmsg and - accept. -

    -

  • Signals in pthreads are now handled properly(ish): - pthread_sigmask, pthread_kill, - sigwait and raise are now implemented. - Each thread has its own signal mask, as POSIX requires. - It's a bit kludgey -- there's a system-wide pending signal set, - rather than one for each thread. But hey. -
- - -As of 18 May 02, the following threaded programs now work fine on my -RedHat 7.2 box: Opera 6.0Beta2, KNode in KDE 3.0, Mozilla-0.9.2.1 and -Galeon-0.11.3, both as supplied with RedHat 7.2. Also Mozilla 1.0RC2. -OpenOffice 1.0. MySQL 3.something (the current stable release). - - -

2.10  Building and installing

- -We now use the standard Unix ./configure, -make, make install mechanism, and I have -attempted to ensure that it works on machines with kernel 2.2 or 2.4 -and glibc 2.1.X or 2.2.X. I don't think there is much else to say. -There are no options apart from the usual --prefix that -you should give to ./configure. - -

-The configure script tests the version of the X server -currently indicated by the current $DISPLAY. This is a -known bug. The intention was to detect the version of the current -XFree86 client libraries, so that correct suppressions could be -selected for them, but instead the test checks the server version. -This is just plain wrong. - -

-If you are building a binary package of Valgrind for distribution, -please read README_PACKAGERS. It contains some important -information. - -

-Apart from that there is no excitement here. Let me know if you have -build problems. - - - - -

2.11  If you have problems

-Mail me (jseward@acm.org). - -

See Section 4 for the known limitations of -Valgrind, and for a list of programs which are known not to work on -it. - -

The translator/instrumentor has a lot of assertions in it. They -are permanently enabled, and I have no plans to disable them. If one -of these breaks, please mail me! - -

If you get an assertion failure on the expression -chunkSane(ch) in vg_free() in -vg_malloc.c, this may have happened because your program -wrote off the end of a malloc'd block, or before its beginning. -Valgrind should have emitted a proper message to that effect before -dying in this way. This is a known problem which I should fix. -

- -


- - -

3  Details of the checking machinery

- -Read this section if you want to know, in detail, exactly what and how -Valgrind is checking. - - -

3.1  Valid-value (V) bits

- -It is simplest to think of Valgrind implementing a synthetic Intel x86 -CPU which is identical to a real CPU, except for one crucial detail. -Every bit (literally) of data processed, stored and handled by the -real CPU has, in the synthetic CPU, an associated "valid-value" bit, -which says whether or not the accompanying bit has a legitimate value. -In the discussions which follow, this bit is referred to as the V -(valid-value) bit. - -

Each byte in the system therefore has a 8 V bits which follow -it wherever it goes. For example, when the CPU loads a word-size item -(4 bytes) from memory, it also loads the corresponding 32 V bits from -a bitmap which stores the V bits for the process' entire address -space. If the CPU should later write the whole or some part of that -value to memory at a different address, the relevant V bits will be -stored back in the V-bit bitmap. - -

In short, each bit in the system has an associated V bit, which -follows it around everywhere, even inside the CPU. Yes, the CPU's -(integer and %eflags) registers have their own V bit -vectors. - -

Copying values around does not cause Valgrind to check for, or -report on, errors. However, when a value is used in a way which might -conceivably affect the outcome of your program's computation, the -associated V bits are immediately checked. If any of these indicate -that the value is undefined, an error is reported. - -

Here's an (admittedly nonsensical) example: -

-  int i, j;
-  int a[10], b[10];
-  for (i = 0; i < 10; i++) {
-    j = a[i];
-    b[i] = j;
-  }
-
- -

Valgrind emits no complaints about this, since it merely copies -uninitialised values from a[] into b[], and -doesn't use them in any way. However, if the loop is changed to -

-  for (i = 0; i < 10; i++) {
-    j += a[i];
-  }
-  if (j == 77) 
-     printf("hello there\n");
-
-then Valgrind will complain, at the if, that the -condition depends on uninitialised values. - -

Most low level operations, such as adds, cause Valgrind to -use the V bits for the operands to calculate the V bits for the -result. Even if the result is partially or wholly undefined, -it does not complain. - -

Checks on definedness only occur in two places: when a value is -used to generate a memory address, and where control flow decision -needs to be made. Also, when a system call is detected, valgrind -checks definedness of parameters as required. - -

If a check should detect undefinedness, an error message is -issued. The resulting value is subsequently regarded as well-defined. -To do otherwise would give long chains of error messages. In effect, -we say that undefined values are non-infectious. - -

This sounds overcomplicated. Why not just check all reads from -memory, and complain if an undefined value is loaded into a CPU register? -Well, that doesn't work well, because perfectly legitimate C programs routinely -copy uninitialised values around in memory, and we don't want endless complaints -about that. Here's the canonical example. Consider a struct -like this: -

-  struct S { int x; char c; };
-  struct S s1, s2;
-  s1.x = 42;
-  s1.c = 'z';
-  s2 = s1;
-
- -

The question to ask is: how large is struct S, in -bytes? An int is 4 bytes and a char one byte, so perhaps a struct S -occupies 5 bytes? Wrong. All (non-toy) compilers I know of will -round the size of struct S up to a whole number of words, -in this case 8 bytes. Not doing this forces compilers to generate -truly appalling code for subscripting arrays of struct -S's. - -

So s1 occupies 8 bytes, yet only 5 of them will be initialised. -For the assignment s2 = s1, gcc generates code to copy -all 8 bytes wholesale into s2 without regard for their -meaning. If Valgrind simply checked values as they came out of -memory, it would yelp every time a structure assignment like this -happened. So the more complicated semantics described above is -necessary. This allows gcc to copy s1 into -s2 any way it likes, and a warning will only be emitted -if the uninitialised values are later used. - -

One final twist to this story. The above scheme allows garbage to -pass through the CPU's integer registers without complaint. It does -this by giving the integer registers V tags, passing these around in -the expected way. This complicated and computationally expensive to -do, but is necessary. Valgrind is more simplistic about -floating-point loads and stores. In particular, V bits for data read -as a result of floating-point loads are checked at the load -instruction. So if your program uses the floating-point registers to -do memory-to-memory copies, you will get complaints about -uninitialised values. Fortunately, I have not yet encountered a -program which (ab)uses the floating-point registers in this way. - - -

3.2  Valid-address (A) bits

- -Notice that the previous section describes how the validity of values -is established and maintained without having to say whether the -program does or does not have the right to access any particular -memory location. We now consider the latter issue. - -

As described above, every bit in memory or in the CPU has an -associated valid-value (V) bit. In addition, all bytes in memory, but -not in the CPU, have an associated valid-address (A) bit. This -indicates whether or not the program can legitimately read or write -that location. It does not give any indication of the validity or the -data at that location -- that's the job of the V bits -- only whether -or not the location may be accessed. - -

Every time your program reads or writes memory, Valgrind checks the -A bits associated with the address. If any of them indicate an -invalid address, an error is emitted. Note that the reads and writes -themselves do not change the A bits, only consult them. - -

So how do the A bits get set/cleared? Like this: - -

    -
  • When the program starts, all the global data areas are marked as - accessible.

  • -

    - -

  • When the program does malloc/new, the A bits for the exactly the - area allocated, and not a byte more, are marked as accessible. - Upon freeing the area the A bits are changed to indicate - inaccessibility.

  • -

    - -

  • When the stack pointer register (%esp) moves up or down, A bits - are set. The rule is that the area from %esp up to the base of - the stack is marked as accessible, and below %esp is - inaccessible. (If that sounds illogical, bear in mind that the - stack grows down, not up, on almost all Unix systems, including - GNU/Linux.) Tracking %esp like this has the useful side-effect - that the section of stack used by a function for local variables - etc is automatically marked accessible on function entry and - inaccessible on exit.

  • -

    - -

  • When doing system calls, A bits are changed appropriately. For - example, mmap() magically makes files appear in the process's - address space, so the A bits must be updated if mmap() - succeeds.

  • -

    - -

  • Optionally, your program can tell Valgrind about such changes - explicitly, using the client request mechanism described above. -
- - - -

3.3  Putting it all together

-Valgrind's checking machinery can be summarised as follows: - -
    -
  • Each byte in memory has 8 associated V (valid-value) bits, - saying whether or not the byte has a defined value, and a single - A (valid-address) bit, saying whether or not the program - currently has the right to read/write that address.

  • -

    - -

  • When memory is read or written, the relevant A bits are - consulted. If they indicate an invalid address, Valgrind emits - an Invalid read or Invalid write error.

  • -

    - -

  • When memory is read into the CPU's integer registers, the - relevant V bits are fetched from memory and stored in the - simulated CPU. They are not consulted.

  • -

    - -

  • When an integer register is written out to memory, the V bits - for that register are written back to memory too.

  • -

    - -

  • When memory is read into the CPU's floating point registers, the - relevant V bits are read from memory and they are immediately - checked. If any are invalid, an uninitialised value error is - emitted. This precludes using the floating-point registers to - copy possibly-uninitialised memory, but simplifies Valgrind in - that it does not have to track the validity status of the - floating-point registers.

  • -

    - -

  • As a result, when a floating-point register is written to - memory, the associated V bits are set to indicate a valid - value.

  • -

    - -

  • When values in integer CPU registers are used to generate a - memory address, or to determine the outcome of a conditional - branch, the V bits for those values are checked, and an error - emitted if any of them are undefined.

  • -

    - -

  • When values in integer CPU registers are used for any other - purpose, Valgrind computes the V bits for the result, but does - not check them.

  • -

    - -

  • One the V bits for a value in the CPU have been checked, they - are then set to indicate validity. This avoids long chains of - errors.

  • -

    - -

  • When values are loaded from memory, valgrind checks the A bits - for that location and issues an illegal-address warning if - needed. In that case, the V bits loaded are forced to indicate - Valid, despite the location being invalid. -

    - This apparently strange choice reduces the amount of confusing - information presented to the user. It avoids the - unpleasant phenomenon in which memory is read from a place which - is both unaddressible and contains invalid values, and, as a - result, you get not only an invalid-address (read/write) error, - but also a potentially large set of uninitialised-value errors, - one for every time the value is used. -

    - There is a hazy boundary case to do with multi-byte loads from - addresses which are partially valid and partially invalid. See - details of the flag --partial-loads-ok for details. -


  • -
- -Valgrind intercepts calls to malloc, calloc, realloc, valloc, -memalign, free, new and delete. The behaviour you get is: - -
    - -
  • malloc/new: the returned memory is marked as addressible but not - having valid values. This means you have to write on it before - you can read it.

  • -

    - -

  • calloc: returned memory is marked both addressible and valid, - since calloc() clears the area to zero.

  • -

    - -

  • realloc: if the new size is larger than the old, the new section - is addressible but invalid, as with malloc.

  • -

    - -

  • If the new size is smaller, the dropped-off section is marked as - unaddressible. You may only pass to realloc a pointer - previously issued to you by malloc/calloc/new/realloc.

  • -

    - -

  • free/delete: you may only pass to free a pointer previously - issued to you by malloc/calloc/new/realloc, or the value - NULL. Otherwise, Valgrind complains. If the pointer is indeed - valid, Valgrind marks the entire area it points at as - unaddressible, and places the block in the freed-blocks-queue. - The aim is to defer as long as possible reallocation of this - block. Until that happens, all attempts to access it will - elicit an invalid-address error, as you would hope.

  • -
- - - - -

3.4  Signals

- -Valgrind provides suitable handling of signals, so, provided you stick -to POSIX stuff, you should be ok. Basic sigaction() and sigprocmask() -are handled. Signal handlers may return in the normal way or do -longjmp(); both should work ok. As specified by POSIX, a signal is -blocked in its own handler. Default actions for signals should work -as before. Etc, etc. - -

Under the hood, dealing with signals is a real pain, and Valgrind's -simulation leaves much to be desired. If your program does -way-strange stuff with signals, bad things may happen. If so, let me -know. I don't promise to fix it, but I'd at least like to be aware of -it. - - - -

3.5  Memory leak detection

- -Valgrind keeps track of all memory blocks issued in response to calls -to malloc/calloc/realloc/new. So when the program exits, it knows -which blocks are still outstanding -- have not been returned, in other -words. Ideally, you want your program to have no blocks still in use -at exit. But many programs do. - -

For each such block, Valgrind scans the entire address space of the -process, looking for pointers to the block. One of three situations -may result: - -

    -
  • A pointer to the start of the block is found. This usually - indicates programming sloppiness; since the block is still - pointed at, the programmer could, at least in principle, free'd - it before program exit.

  • -

    - -

  • A pointer to the interior of the block is found. The pointer - might originally have pointed to the start and have been moved - along, or it might be entirely unrelated. Valgrind deems such a - block as "dubious", that is, possibly leaked, - because it's unclear whether or - not a pointer to it still exists.

  • -

    - -

  • The worst outcome is that no pointer to the block can be found. - The block is classified as "leaked", because the - programmer could not possibly have free'd it at program exit, - since no pointer to it exists. This might be a symptom of - having lost the pointer at some earlier point in the - program.
  • -
- -Valgrind reports summaries about leaked and dubious blocks. -For each such block, it will also tell you where the block was -allocated. This should help you figure out why the pointer to it has -been lost. In general, you should attempt to ensure your programs do -not have any leaked or dubious blocks at exit. - -

The precise area of memory in which Valgrind searches for pointers -is: all naturally-aligned 4-byte words for which all A bits indicate -addressibility and all V bits indicated that the stored value is -actually valid. - -


- - - -

4  Limitations

- -The following list of limitations seems depressingly long. However, -most programs actually work fine. - -

Valgrind will run x86-GNU/Linux ELF dynamically linked binaries, on -a kernel 2.2.X or 2.4.X system, subject to the following constraints: - -

    -
  • No MMX, SSE, SSE2, 3DNow instructions. If the translator - encounters these, Valgrind will simply give up. It may be - possible to add support for them at a later time. Intel added a - few instructions such as "cmov" to the integer instruction set - on Pentium and later processors, and these are supported. - Nevertheless it's safest to think of Valgrind as implementing - the 486 instruction set.

  • -

    - -

  • Pthreads support is improving, but there are still significant - limitations in that department. See the section above on - Pthreads. Note that your program must be dynamically linked - against libpthread.so, so that Valgrind can - substitute its own implementation at program startup time. If - you're statically linked against it, things will fail - badly.

  • -

    - -

  • Valgrind assumes that the floating point registers are not used - as intermediaries in memory-to-memory copies, so it immediately - checks V bits in floating-point loads/stores. If you want to - write code which copies around possibly-uninitialised values, - you must ensure these travel through the integer registers, not - the FPU.

  • -

    - -

  • If your program does its own memory management, rather than - using malloc/new/free/delete, it should still work, but - Valgrind's error checking won't be so effective.

  • -

    - -

  • Valgrind's signal simulation is not as robust as it could be. - Basic POSIX-compliant sigaction and sigprocmask functionality is - supplied, but it's conceivable that things could go badly awry - if you do wierd things with signals. Workaround: don't. - Programs that do non-POSIX signal tricks are in any case - inherently unportable, so should be avoided if - possible.

  • -

    - -

  • Programs which switch stacks are not well handled. Valgrind - does have support for this, but I don't have great faith in it. - It's difficult -- there's no cast-iron way to decide whether a - large change in %esp is as a result of the program switching - stacks, or merely allocating a large object temporarily on the - current stack -- yet Valgrind needs to handle the two situations - differently. 1 May 02: this probably interacts badly with the - new pthread support. I haven't checked properly.

  • -

    - -

  • x86 instructions, and system calls, have been implemented on - demand. So it's possible, although unlikely, that a program - will fall over with a message to that effect. If this happens, - please mail me ALL the details printed out, so I can try and - implement the missing feature.

  • -

    - -

  • x86 floating point works correctly, but floating-point code may - run even more slowly than integer code, due to my simplistic - approach to FPU emulation.

  • -

    - -

  • You can't Valgrind-ize statically linked binaries. Valgrind - relies on the dynamic-link mechanism to gain control at - startup.

  • -

    - -

  • Memory consumption of your program is majorly increased whilst - running under Valgrind. This is due to the large amount of - adminstrative information maintained behind the scenes. Another - cause is that Valgrind dynamically translates the original - executable. Translated, instrumented code is 14-16 times larger - than the original (!) so you can easily end up with 30+ MB of - translations when running (eg) a web browser. -
  • -
- -Programs which are known not to work are: - -
    -
  • emacs starts up but immediately concludes it is out of memory - and aborts. Emacs has it's own memory-management scheme, but I - don't understand why this should interact so badly with - Valgrind. Emacs works fine if you build it to use the standard - malloc/free routines.

  • -

    -

- -Known platform-specific limitations, as of release 1.0.0: - -
    -
  • On Red Hat 7.3, there have been reports of link errors (at - program start time) for threaded programs using - __pthread_clock_gettime and - __pthread_clock_settime. This appears to be due to - /lib/librt-2.2.5.so needing them. Unfortunately I - do not understand enough about this problem to fix it properly, - and I can't reproduce it on my test RedHat 7.3 system. Please - mail me if you have more information / understanding.

  • -

    -

  • - 1.0.0 now partially works on Red Hat 7.3.92 ("Limbo" - public beta). However, don't expect a smooth ride. - Basically valgrind won't work as-is with any - glibc-2.3 based system. Limbo is just a little pre glibc-2.3 - and it just about works. Limbo is also gcc-3.1 based and so - suffers from the problems in the following point.

  • -

    -

  • - Inlining of string functions with gcc-3.1 or above causes a - large number of false reports of uninitialised value uses. I - know what the problem is and roughly how to fix it, but I need - to devise a reasonably efficient fix. Try to reduce the - optimisation level, or use -fno-builtin-strlen in - the meantime. Or use an earlier gcc.

  • -

    -

- - -


- - - -

5  How it works -- a rough overview

-Some gory details, for those with a passion for gory details. You -don't need to read this section if all you want to do is use Valgrind. - - -

5.1  Getting started

- -Valgrind is compiled into a shared object, valgrind.so. The shell -script valgrind sets the LD_PRELOAD environment variable to point to -valgrind.so. This causes the .so to be loaded as an extra library to -any subsequently executed dynamically-linked ELF binary, viz, the -program you want to debug. - -

The dynamic linker allows each .so in the process image to have an -initialisation function which is run before main(). It also allows -each .so to have a finalisation function run after main() exits. - -

When valgrind.so's initialisation function is called by the dynamic -linker, the synthetic CPU to starts up. The real CPU remains locked -in valgrind.so for the entire rest of the program, but the synthetic -CPU returns from the initialisation function. Startup of the program -now continues as usual -- the dynamic linker calls all the other .so's -initialisation routines, and eventually runs main(). This all runs on -the synthetic CPU, not the real one, but the client program cannot -tell the difference. - -

Eventually main() exits, so the synthetic CPU calls valgrind.so's -finalisation function. Valgrind detects this, and uses it as its cue -to exit. It prints summaries of all errors detected, possibly checks -for memory leaks, and then exits the finalisation routine, but now on -the real CPU. The synthetic CPU has now lost control -- permanently --- so the program exits back to the OS on the real CPU, just as it -would have done anyway. - -

On entry, Valgrind switches stacks, so it runs on its own stack. -On exit, it switches back. This means that the client program -continues to run on its own stack, so we can switch back and forth -between running it on the simulated and real CPUs without difficulty. -This was an important design decision, because it makes it easy (well, -significantly less difficult) to debug the synthetic CPU. - - - -

5.2  The translation/instrumentation engine

- -Valgrind does not directly run any of the original program's code. Only -instrumented translations are run. Valgrind maintains a translation -table, which allows it to find the translation quickly for any branch -target (code address). If no translation has yet been made, the -translator - a just-in-time translator - is summoned. This makes an -instrumented translation, which is added to the collection of -translations. Subsequent jumps to that address will use this -translation. - -

Valgrind no longer directly supports detection of self-modifying -code. Such checking is expensive, and in practice (fortunately) -almost no applications need it. However, to help people who are -debugging dynamic code generation systems, there is a Client Request -(basically a macro you can put in your program) which directs Valgrind -to discard translations in a given address range. So Valgrind can -still work in this situation provided the client tells it when -code has become out-of-date and needs to be retranslated. - -

The JITter translates basic blocks -- blocks of straight-line-code --- as single entities. To minimise the considerable difficulties of -dealing with the x86 instruction set, x86 instructions are first -translated to a RISC-like intermediate code, similar to sparc code, -but with an infinite number of virtual integer registers. Initially -each insn is translated seperately, and there is no attempt at -instrumentation. - -

The intermediate code is improved, mostly so as to try and cache -the simulated machine's registers in the real machine's registers over -several simulated instructions. This is often very effective. Also, -we try to remove redundant updates of the simulated machines's -condition-code register. - -

The intermediate code is then instrumented, giving more -intermediate code. There are a few extra intermediate-code operations -to support instrumentation; it is all refreshingly simple. After -instrumentation there is a cleanup pass to remove redundant value -checks. - -

This gives instrumented intermediate code which mentions arbitrary -numbers of virtual registers. A linear-scan register allocator is -used to assign real registers and possibly generate spill code. All -of this is still phrased in terms of the intermediate code. This -machinery is inspired by the work of Reuben Thomas (MITE). - -

Then, and only then, is the final x86 code emitted. The -intermediate code is carefully designed so that x86 code can be -generated from it without need for spare registers or other -inconveniences. - -

The translations are managed using a traditional LRU-based caching -scheme. The translation cache has a default size of about 14MB. - - - -

5.3  Tracking the status of memory

Each byte in the -process' address space has nine bits associated with it: one A bit and -eight V bits. The A and V bits for each byte are stored using a -sparse array, which flexibly and efficiently covers arbitrary parts of -the 32-bit address space without imposing significant space or -performance overheads for the parts of the address space never -visited. The scheme used, and speedup hacks, are described in detail -at the top of the source file vg_memory.c, so you should read that for -the gory details. - - - -

5.4 System calls

-All system calls are intercepted. The memory status map is consulted -before and updated after each call. It's all rather tiresome. See -vg_syscall_mem.c for details. - - - -

5.5  Signals

-All system calls to sigaction() and sigprocmask() are intercepted. If -the client program is trying to set a signal handler, Valgrind makes a -note of the handler address and which signal it is for. Valgrind then -arranges for the same signal to be delivered to its own handler. - -

When such a signal arrives, Valgrind's own handler catches it, and -notes the fact. At a convenient safe point in execution, Valgrind -builds a signal delivery frame on the client's stack and runs its -handler. If the handler longjmp()s, there is nothing more to be said. -If the handler returns, Valgrind notices this, zaps the delivery -frame, and carries on where it left off before delivering the signal. - -

The purpose of this nonsense is that setting signal handlers -essentially amounts to giving callback addresses to the Linux kernel. -We can't allow this to happen, because if it did, signal handlers -would run on the real CPU, not the simulated one. This means the -checking machinery would not operate during the handler run, and, -worse, memory permissions maps would not be updated, which could cause -spurious error reports once the handler had returned. - -

An even worse thing would happen if the signal handler longjmp'd -rather than returned: Valgrind would completely lose control of the -client program. - -

Upshot: we can't allow the client to install signal handlers -directly. Instead, Valgrind must catch, on behalf of the client, any -signal the client asks to catch, and must delivery it to the client on -the simulated CPU, not the real one. This involves considerable -gruesome fakery; see vg_signals.c for details. -

- -


- - -

6  Example

-This is the log for a run of a small program. The program is in fact -correct, and the reported error is as the result of a potentially serious -code generation bug in GNU g++ (snapshot 20010527). -
-sewardj@phoenix:~/newmat10$
-~/Valgrind-6/valgrind -v ./bogon 
-==25832== Valgrind 0.10, a memory error detector for x86 RedHat 7.1.
-==25832== Copyright (C) 2000-2001, and GNU GPL'd, by Julian Seward.
-==25832== Startup, with flags:
-==25832== --suppressions=/home/sewardj/Valgrind/redhat71.supp
-==25832== reading syms from /lib/ld-linux.so.2
-==25832== reading syms from /lib/libc.so.6
-==25832== reading syms from /mnt/pima/jrs/Inst/lib/libgcc_s.so.0
-==25832== reading syms from /lib/libm.so.6
-==25832== reading syms from /mnt/pima/jrs/Inst/lib/libstdc++.so.3
-==25832== reading syms from /home/sewardj/Valgrind/valgrind.so
-==25832== reading syms from /proc/self/exe
-==25832== loaded 5950 symbols, 142333 line number locations
-==25832== 
-==25832== Invalid read of size 4
-==25832==    at 0x8048724: _ZN10BandMatrix6ReSizeEiii (bogon.cpp:45)
-==25832==    by 0x80487AF: main (bogon.cpp:66)
-==25832==    by 0x40371E5E: __libc_start_main (libc-start.c:129)
-==25832==    by 0x80485D1: (within /home/sewardj/newmat10/bogon)
-==25832==    Address 0xBFFFF74C is not stack'd, malloc'd or free'd
-==25832==
-==25832== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
-==25832== malloc/free: in use at exit: 0 bytes in 0 blocks.
-==25832== malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
-==25832== For a detailed leak analysis, rerun with: --leak-check=yes
-==25832==
-==25832== exiting, did 1881 basic blocks, 0 misses.
-==25832== 223 translations, 3626 bytes in, 56801 bytes out.
-
-

The GCC folks fixed this about a week before gcc-3.0 shipped. -


-

- - - - -

7  Cache profiling

-As well as memory debugging, Valgrind also allows you to do cache simulations -and annotate your source line-by-line with the number of cache misses. In -particular, it records: -
    -
  • L1 instruction cache reads and misses; -
  • L1 data cache reads and read misses, writes and write misses; -
  • L2 unified cache reads and read misses, writes and writes misses. -
-On a modern x86 machine, an L1 miss will typically cost around 10 cycles, -and an L2 miss can cost as much as 200 cycles. Detailed cache profiling can be -very useful for improving the performance of your program.

- -Also, since one instruction cache read is performed per instruction executed, -you can find out how many instructions are executed per line, which can be -useful for traditional profiling and test coverage.

- -Any feedback, bug-fixes, suggestions, etc, welcome. - - -

7.1  Overview

-First off, as for normal Valgrind use, you probably want to turn on debugging -info (the -g flag). But by contrast with normal Valgrind use, you -probably do want to turn optimisation on, since you should profile your -program as it will be normally run. - -The two steps are: -
    -
  1. Run your program with cachegrind in front of the - normal command line invocation. When the program finishes, - Valgrind will print summary cache statistics. It also collects - line-by-line information in a file cachegrind.out. -

    - This step should be done every time you want to collect - information about a new program, a changed program, or about the - same program with different input. -

  2. -

    -

  3. Generate a function-by-function summary, and possibly annotate - source files with 'vg_annotate'. Source files to annotate can be - specified manually, or manually on the command line, or - "interesting" source files can be annotated automatically with - the --auto=yes option. You can annotate C/C++ - files or assembly language files equally easily. -

    - This step can be performed as many times as you like for each - Step 2. You may want to do multiple annotations showing - different information each time.

    -

  4. -
- -The steps are described in detail in the following sections.

- - -

7.2  Cache simulation specifics

- -Cachegrind uses a simulation for a machine with a split L1 cache and a unified -L2 cache. This configuration is used for all (modern) x86-based machines we -are aware of. Old Cyrix CPUs had a unified I and D L1 cache, but they are -ancient history now.

- -The more specific characteristics of the simulation are as follows. - -

    -
  • Write-allocate: when a write miss occurs, the block written to - is brought into the D1 cache. Most modern caches have this - property.
  • - -

  • Bit-selection hash function: the line(s) in the cache to which a - memory block maps is chosen by the middle bits M--(M+N-1) of the - byte address, where: -
      -
    •  line size = 2^M bytes 
    • -
    • (cache size / line size) = 2^N bytes
    • -
  • - -

  • Inclusive L2 cache: the L2 cache replicates all the entries of - the L1 cache. This is standard on Pentium chips, but AMD - Athlons use an exclusive L2 cache that only holds blocks evicted - from L1. Ditto AMD Durons and most modern VIAs.
  • -

- -The cache configuration simulated (cache size, associativity and line size) is -determined automagically using the CPUID instruction. If you have an old -machine that (a) doesn't support the CPUID instruction, or (b) supports it in -an early incarnation that doesn't give any cache information, then Cachegrind -will fall back to using a default configuration (that of a model 3/4 Athlon). -Cachegrind will tell you if this happens. You can manually specify one, two or -all three levels (I1/D1/L2) of the cache from the command line using the ---I1, --D1 and --L2 options.

- -Other noteworthy behaviour: - -

    -
  • References that straddle two cache lines are treated as follows: -
      -
    • If both blocks hit --> counted as one hit
    • -
    • If one block hits, the other misses --> counted as one miss
    • -
    • If both blocks miss --> counted as one miss (not two)
    • -

  • - -
  • Instructions that modify a memory location (eg. inc and - dec) are counted as doing just a read, ie. a single data - reference. This may seem strange, but since the write can never cause a - miss (the read guarantees the block is in the cache) it's not very - interesting.

    - - Thus it measures not the number of times the data cache is accessed, but - the number of times a data cache miss could occur.

    -

  • -
- -If you are interested in simulating a cache with different properties, it is -not particularly hard to write your own cache simulator, or to modify the -existing ones in vg_cachesim_I1.c, vg_cachesim_D1.c, -vg_cachesim_L2.c and vg_cachesim_gen.c. We'd be -interested to hear from anyone who does. - - -

7.3  Profiling programs

- -Cache profiling is enabled by using the --cachesim=yes -option to the valgrind shell script. Alternatively, it -is probably more convenient to use the cachegrind script. -Either way automatically turns off Valgrind's memory checking functions, -since the cache simulation is slow enough already, and you probably -don't want to do both at once. -

-To gather cache profiling information about the program ls --l, type: - -

cachegrind ls -l
- -The program will execute (slowly). Upon completion, summary statistics -that look like this will be printed: - -
-==31751== I   refs:      27,742,716
-==31751== I1  misses:           276
-==31751== L2  misses:           275
-==31751== I1  miss rate:        0.0%
-==31751== L2i miss rate:        0.0%
-==31751== 
-==31751== D   refs:      15,430,290  (10,955,517 rd + 4,474,773 wr)
-==31751== D1  misses:        41,185  (    21,905 rd +    19,280 wr)
-==31751== L2  misses:        23,085  (     3,987 rd +    19,098 wr)
-==31751== D1  miss rate:        0.2% (       0.1%   +       0.4%)
-==31751== L2d miss rate:        0.1% (       0.0%   +       0.4%)
-==31751== 
-==31751== L2 misses:         23,360  (     4,262 rd +    19,098 wr)
-==31751== L2 miss rate:         0.0% (       0.0%   +       0.4%)
-
- -Cache accesses for instruction fetches are summarised first, giving the -number of fetches made (this is the number of instructions executed, which -can be useful to know in its own right), the number of I1 misses, and the -number of L2 instruction (L2i) misses.

- -Cache accesses for data follow. The information is similar to that of the -instruction fetches, except that the values are also shown split between reads -and writes (note each row's rd and wr values add up -to the row's total).

- -Combined instruction and data figures for the L2 cache follow that.

- - -

7.4  Output file

- -As well as printing summary information, Cachegrind also writes -line-by-line cache profiling information to a file named -cachegrind.out. This file is human-readable, but is best -interpreted by the accompanying program vg_annotate, -described in the next section. -

-Things to note about the cachegrind.out file: -

    -
  • It is written every time valgrind --cachesim=yes or - cachegrind is run, and will overwrite any existing - cachegrind.out in the current directory.
  • -

    -

  • It can be huge: ls -l generates a file of about - 350KB. Browsing a few files and web pages with a Konqueror - built with full debugging information generates a file - of around 15 MB.
  • -
- - -

7.5  Cachegrind options

-Cachegrind accepts all the options that Valgrind does, although some of them -(ones related to memory checking) don't do anything when cache profiling.

- -The interesting cache-simulation specific options are: - -

    -
  • --I1=<size>,<associativity>,<line_size>
    - --D1=<size>,<associativity>,<line_size>
    - --L2=<size>,<associativity>,<line_size>

    - [default: uses CPUID for automagic cache configuration]

    - - Manually specifies the I1/D1/L2 cache configuration, where - size and line_size are measured in bytes. The - three items must be comma-separated, but with no spaces, eg: - -

    cachegrind --I1=65535,2,64
    - - You can specify one, two or three of the I1/D1/L2 caches. Any level not - manually specified will be simulated using the configuration found in the - normal way (via the CPUID instruction, or failing that, via defaults). -
- - - -

7.6  Annotating C/C++ programs

- -Before using vg_annotate, it is worth widening your -window to be at least 120-characters wide if possible, as the output -lines can be quite long. -

-To get a function-by-function summary, run vg_annotate in -directory containing a cachegrind.out file. The output -looks like this: - -

---------------------------------------------------------------------------------
-I1 cache:              65536 B, 64 B, 2-way associative
-D1 cache:              65536 B, 64 B, 2-way associative
-L2 cache:              262144 B, 64 B, 8-way associative
-Command:               concord vg_to_ucode.c
-Events recorded:       Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw
-Events shown:          Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw
-Event sort order:      Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw
-Threshold:             99%
-Chosen for annotation:
-Auto-annotation:       on
-
---------------------------------------------------------------------------------
-Ir         I1mr I2mr Dr         D1mr   D2mr  Dw        D1mw   D2mw
---------------------------------------------------------------------------------
-27,742,716  276  275 10,955,517 21,905 3,987 4,474,773 19,280 19,098  PROGRAM TOTALS
-
---------------------------------------------------------------------------------
-Ir        I1mr I2mr Dr        D1mr  D2mr  Dw        D1mw   D2mw    file:function
---------------------------------------------------------------------------------
-8,821,482    5    5 2,242,702 1,621    73 1,794,230      0      0  getc.c:_IO_getc
-5,222,023    4    4 2,276,334    16    12   875,959      1      1  concord.c:get_word
-2,649,248    2    2 1,344,810 7,326 1,385         .      .      .  vg_main.c:strcmp
-2,521,927    2    2   591,215     0     0   179,398      0      0  concord.c:hash
-2,242,740    2    2 1,046,612   568    22   448,548      0      0  ctype.c:tolower
-1,496,937    4    4   630,874 9,000 1,400   279,388      0      0  concord.c:insert
-  897,991   51   51   897,831    95    30        62      1      1  ???:???
-  598,068    1    1   299,034     0     0   149,517      0      0  ../sysdeps/generic/lockfile.c:__flockfile
-  598,068    0    0   299,034     0     0   149,517      0      0  ../sysdeps/generic/lockfile.c:__funlockfile
-  598,024    4    4   213,580    35    16   149,506      0      0  vg_clientmalloc.c:malloc
-  446,587    1    1   215,973 2,167   430   129,948 14,057 13,957  concord.c:add_existing
-  341,760    2    2   128,160     0     0   128,160      0      0  vg_clientmalloc.c:vg_trap_here_WRAPPER
-  320,782    4    4   150,711   276     0    56,027     53     53  concord.c:init_hash_table
-  298,998    1    1   106,785     0     0    64,071      1      1  concord.c:create
-  149,518    0    0   149,516     0     0         1      0      0  ???:tolower@@GLIBC_2.0
-  149,518    0    0   149,516     0     0         1      0      0  ???:fgetc@@GLIBC_2.0
-   95,983    4    4    38,031     0     0    34,409  3,152  3,150  concord.c:new_word_node
-   85,440    0    0    42,720     0     0    21,360      0      0  vg_clientmalloc.c:vg_bogus_epilogue
-
- -First up is a summary of the annotation options: - -
    -
  • I1 cache, D1 cache, L2 cache: cache configuration. So you know the - configuration with which these results were obtained.
  • - -

  • Command: the command line invocation of the program under - examination.
  • - -

  • Events recorded: event abbreviations are:

    -

      -
    • Ir : I cache reads (ie. instructions executed)
    • -
    • I1mr: I1 cache read misses
    • -
    • I2mr: L2 cache instruction read misses
    • -
    • Dr : D cache reads (ie. memory reads)
    • -
    • D1mr: D1 cache read misses
    • -
    • D2mr: L2 cache data read misses
    • -
    • Dw : D cache writes (ie. memory writes)
    • -
    • D1mw: D1 cache write misses
    • -
    • D2mw: L2 cache data write misses
    • -

    - Note that D1 total accesses is given by D1mr + - D1mw, and that L2 total accesses is given by - I2mr + D2mr + D2mw.

  • - -

  • Events shown: the events shown (a subset of events gathered). This can - be adjusted with the --show option.
  • - -

  • Event sort order: the sort order in which functions are shown. For - example, in this case the functions are sorted from highest - Ir counts to lowest. If two functions have identical - Ir counts, they will then be sorted by I1mr - counts, and so on. This order can be adjusted with the - --sort option.

    - - Note that this dictates the order the functions appear. It is not - the order in which the columns appear; that is dictated by the "events - shown" line (and can be changed with the --show option). -

  • - -

  • Threshold: vg_annotate by default omits functions - that cause very low numbers of misses to avoid drowning you in - information. In this case, vg_annotate shows summaries the - functions that account for 99% of the Ir counts; - Ir is chosen as the threshold event since it is the - primary sort event. The threshold can be adjusted with the - --threshold option.
  • - -

  • Chosen for annotation: names of files specified manually for annotation; - in this case none.
  • - -

  • Auto-annotation: whether auto-annotation was requested via the - --auto=yes option. In this case no.
  • -

- -Then follows summary statistics for the whole program. These are similar -to the summary provided when running cachegrind.

- -Then follows function-by-function statistics. Each function is -identified by a file_name:function_name pair. If a column -contains only a dot it means the function never performs -that event (eg. the third row shows that strcmp() -contains no instructions that write to memory). The name -??? is used if the the file name and/or function name -could not be determined from debugging information. If most of the -entries have the form ???:??? the program probably wasn't -compiled with -g. If any code was invalidated (either due to -self-modifying code or unloading of shared objects) its counts are aggregated -into a single cost centre written as (discarded):(discarded).

- -It is worth noting that functions will come from three types of source files: -

    -
  1. From the profiled program (concord.c in this example).
  2. -
  3. From libraries (eg. getc.c)
  4. -
  5. From Valgrind's implementation of some libc functions (eg. - vg_clientmalloc.c:malloc). These are recognisable because - the filename begins with vg_, and is probably one of - vg_main.c, vg_clientmalloc.c or - vg_mylibc.c. -
  6. -
- -There are two ways to annotate source files -- by choosing them -manually, or with the --auto=yes option. To do it -manually, just specify the filenames as arguments to -vg_annotate. For example, the output from running -vg_annotate concord.c for our example produces the same -output as above followed by an annotated version of -concord.c, a section of which looks like: - -
---------------------------------------------------------------------------------
--- User-annotated source: concord.c
---------------------------------------------------------------------------------
-Ir        I1mr I2mr Dr      D1mr  D2mr  Dw      D1mw   D2mw
-
-[snip]
-
-        .    .    .       .     .     .       .      .      .  void init_hash_table(char *file_name, Word_Node *table[])
-        3    1    1       .     .     .       1      0      0  {
-        .    .    .       .     .     .       .      .      .      FILE *file_ptr;
-        .    .    .       .     .     .       .      .      .      Word_Info *data;
-        1    0    0       .     .     .       1      1      1      int line = 1, i;
-        .    .    .       .     .     .       .      .      .
-        5    0    0       .     .     .       3      0      0      data = (Word_Info *) create(sizeof(Word_Info));
-        .    .    .       .     .     .       .      .      .
-    4,991    0    0   1,995     0     0     998      0      0      for (i = 0; i < TABLE_SIZE; i++)
-    3,988    1    1   1,994     0     0     997     53     52          table[i] = NULL;
-        .    .    .       .     .     .       .      .      .
-        .    .    .       .     .     .       .      .      .      /* Open file, check it. */
-        6    0    0       1     0     0       4      0      0      file_ptr = fopen(file_name, "r");
-        2    0    0       1     0     0       .      .      .      if (!(file_ptr)) {
-        .    .    .       .     .     .       .      .      .          fprintf(stderr, "Couldn't open '%s'.\n", file_name);
-        1    1    1       .     .     .       .      .      .          exit(EXIT_FAILURE);
-        .    .    .       .     .     .       .      .      .      }
-        .    .    .       .     .     .       .      .      .
-  165,062    1    1  73,360     0     0  91,700      0      0      while ((line = get_word(data, line, file_ptr)) != EOF)
-  146,712    0    0  73,356     0     0  73,356      0      0          insert(data->;word, data->line, table);
-        .    .    .       .     .     .       .      .      .
-        4    0    0       1     0     0       2      0      0      free(data);
-        4    0    0       1     0     0       2      0      0      fclose(file_ptr);
-        3    0    0       2     0     0       .      .      .  }
-
- -(Although column widths are automatically minimised, a wide terminal is clearly -useful.)

- -Each source file is clearly marked (User-annotated source) as -having been chosen manually for annotation. If the file was found in one of -the directories specified with the -I/--include -option, the directory and file are both given.

- -Each line is annotated with its event counts. Events not applicable for a line -are represented by a `.'; this is useful for distinguishing between an event -which cannot happen, and one which can but did not.

- -Sometimes only a small section of a source file is executed. To minimise -uninteresting output, Valgrind only shows annotated lines and lines within a -small distance of annotated lines. Gaps are marked with the line numbers so -you know which part of a file the shown code comes from, eg: - -

-(figures and code for line 704)
--- line 704 ----------------------------------------
--- line 878 ----------------------------------------
-(figures and code for line 878)
-
- -The amount of context to show around annotated lines is controlled by the ---context option.

- -To get automatic annotation, run vg_annotate --auto=yes. -vg_annotate will automatically annotate every source file it can find that is -mentioned in the function-by-function summary. Therefore, the files chosen for -auto-annotation are affected by the --sort and ---threshold options. Each source file is clearly marked -(Auto-annotated source) as being chosen automatically. Any files -that could not be found are mentioned at the end of the output, eg: - -

---------------------------------------------------------------------------------
-The following files chosen for auto-annotation could not be found:
---------------------------------------------------------------------------------
-  getc.c
-  ctype.c
-  ../sysdeps/generic/lockfile.c
-
- -This is quite common for library files, since libraries are usually compiled -with debugging information, but the source files are often not present on a -system. If a file is chosen for annotation both manually and -automatically, it is marked as User-annotated source. - -Use the -I/--include option to tell Valgrind where to look for -source files if the filenames found from the debugging information aren't -specific enough. - -Beware that vg_annotate can take some time to digest large -cachegrind.out files, eg. 30 seconds or more. Also beware that -auto-annotation can produce a lot of output if your program is large! - - -

7.7  Annotating assembler programs

- -Valgrind can annotate assembler programs too, or annotate the -assembler generated for your C program. Sometimes this is useful for -understanding what is really happening when an interesting line of C -code is translated into multiple instructions.

- -To do this, you just need to assemble your .s files with -assembler-level debug information. gcc doesn't do this, but you can -use the GNU assembler with the --gstabs option to -generate object files with this information, eg: - -

as --gstabs foo.s
- -You can then profile and annotate source files in the same way as for C/C++ -programs. - - -

7.8  vg_annotate options

-
    -
  • -h, --help
  • -

  • -v, --version

    - - Help and version, as usual.

  • - -
  • --sort=A,B,C [default: order in - cachegrind.out]

    - Specifies the events upon which the sorting of the function-by-function - entries will be based. Useful if you want to concentrate on eg. I cache - misses (--sort=I1mr,I2mr), or D cache misses - (--sort=D1mr,D2mr), or L2 misses - (--sort=D2mr,I2mr).

  • - -

  • --show=A,B,C [default: all, using order in - cachegrind.out]

    - Specifies which events to show (and the column order). Default is to use - all present in the cachegrind.out file (and use the order in - the file).

  • - -

  • --threshold=X [default: 99%]

    - Sets the threshold for the function-by-function summary. Functions are - shown that account for more than X% of the primary sort event. If - auto-annotating, also affects which files are annotated. - - Note: thresholds can be set for more than one of the events by appending - any events for the --sort option with a colon and a number - (no spaces, though). E.g. if you want to see the functions that cover - 99% of L2 read misses and 99% of L2 write misses, use this option: - -

    --sort=D2mr:99,D2mw:99
    -
  • - -

  • --auto=no [default]
    - --auto=yes

    - When enabled, automatically annotates every file that is mentioned in the - function-by-function summary that can be found. Also gives a list of - those that couldn't be found. - -

  • --context=N [default: 8]

    - Print N lines of context before and after each annotated line. Avoids - printing large sections of source files that were not executed. Use a - large number (eg. 10,000) to show all source lines. -

  • - -

  • -I=<dir>, --include=<dir> - [default: empty string]

    - Adds a directory to the list in which to search for files. Multiple - -I/--include options can be given to add multiple directories. -

- - -

7.9  Warnings

-There are a couple of situations in which vg_annotate issues warnings. - -
    -
  • If a source file is more recent than the cachegrind.out - file. This is because the information in cachegrind.out is - only recorded with line numbers, so if the line numbers change at all in - the source (eg. lines added, deleted, swapped), any annotations will be - incorrect.

    - -

  • If information is recorded about line numbers past the end of a file. - This can be caused by the above problem, ie. shortening the source file - while using an old cachegrind.out file. If this happens, - the figures for the bogus lines are printed anyway (clearly marked as - bogus) in case they are important.
  • -

- - -

7.10  Things to watch out for

-Some odd things that can occur during annotation: - -
    -
  • If annotating at the assembler level, you might see something like this: - -
    -      1    0    0  .    .    .  .    .    .          leal -12(%ebp),%eax
    -      1    0    0  .    .    .  1    0    0          movl %eax,84(%ebx)
    -      2    0    0  0    0    0  1    0    0          movl $1,-20(%ebp)
    -      .    .    .  .    .    .  .    .    .          .align 4,0x90
    -      1    0    0  .    .    .  .    .    .          movl $.LnrB,%eax
    -      1    0    0  .    .    .  1    0    0          movl %eax,-16(%ebp)
    -      
    - - How can the third instruction be executed twice when the others are - executed only once? As it turns out, it isn't. Here's a dump of the - executable, using objdump -d: - -
    -      8048f25:       8d 45 f4                lea    0xfffffff4(%ebp),%eax
    -      8048f28:       89 43 54                mov    %eax,0x54(%ebx)
    -      8048f2b:       c7 45 ec 01 00 00 00    movl   $0x1,0xffffffec(%ebp)
    -      8048f32:       89 f6                   mov    %esi,%esi
    -      8048f34:       b8 08 8b 07 08          mov    $0x8078b08,%eax
    -      8048f39:       89 45 f0                mov    %eax,0xfffffff0(%ebp)
    -      
    - - Notice the extra mov %esi,%esi instruction. Where did this - come from? The GNU assembler inserted it to serve as the two bytes of - padding needed to align the movl $.LnrB,%eax instruction on - a four-byte boundary, but pretended it didn't exist when adding debug - information. Thus when Valgrind reads the debug info it thinks that the - movl $0x1,0xffffffec(%ebp) instruction covers the address - range 0x8048f2b--0x804833 by itself, and attributes the counts for the - mov %esi,%esi to it.

    -

  • - -
  • Inlined functions can cause strange results in the function-by-function - summary. If a function inline_me() is defined in - foo.h and inlined in the functions f1(), - f2() and f3() in bar.c, there will - not be a foo.h:inline_me() function entry. Instead, there - will be separate function entries for each inlining site, ie. - foo.h:f1(), foo.h:f2() and - foo.h:f3(). To find the total counts for - foo.h:inline_me(), add up the counts from each entry.

    - - The reason for this is that although the debug info output by gcc - indicates the switch from bar.c to foo.h, it - doesn't indicate the name of the function in foo.h, so - Valgrind keeps using the old one.

    - -

  • Sometimes, the same filename might be represented with a relative name - and with an absolute name in different parts of the debug info, eg: - /home/user/proj/proj.h and ../proj.h. In this - case, if you use auto-annotation, the file will be annotated twice with - the counts split between the two.

    -

  • - -
  • Files with more than 65,535 lines cause difficulties for the stabs debug - info reader. This is because the line number in the struct - nlist defined in a.out.h under Linux is only a 16-bit - value. Valgrind can handle some files with more than 65,535 lines - correctly by making some guesses to identify line number overflows. But - some cases are beyond it, in which case you'll get a warning message - explaining that annotations for the file might be incorrect.

    -

  • - -
  • If you compile some files with -g and some without, some - events that take place in a file without debug info could be attributed - to the last line of a file with debug info (whichever one gets placed - before the non-debug-info file in the executable).

    -

  • -
- -This list looks long, but these cases should be fairly rare.

- -Note: stabs is not an easy format to read. If you come across bizarre -annotations that look like might be caused by a bug in the stabs reader, -please let us know.

- - -

7.11  Accuracy

-Valgrind's cache profiling has a number of shortcomings: - -
    -
  • It doesn't account for kernel activity -- the effect of system calls on - the cache contents is ignored.
  • - -

  • It doesn't account for other process activity (although this is probably - desirable when considering a single program).
  • - -

  • It doesn't account for virtual-to-physical address mappings; hence the - entire simulation is not a true representation of what's happening in the - cache.
  • - -

  • It doesn't account for cache misses not visible at the instruction level, - eg. those arising from TLB misses, or speculative execution.
  • - -

  • Valgrind's custom malloc() will allocate memory in different - ways to the standard malloc(), which could warp the results. -
  • - -

  • Valgrind's custom threads implementation will schedule threads - differently to the standard one. This too could warp the results for - threaded programs. -
  • - -

  • The instructions bts, btr and btc - will incorrectly be counted as doing a data read if both the arguments - are registers, eg: - -
    btsl %eax, %edx
    - - This should only happen rarely. -
- -Another thing worth nothing is that results are very sensitive. Changing the -size of the valgrind.so file, the size of the program being -profiled, or even the length of its name can perturb the results. Variations -will be small, but don't expect perfectly repeatable results if your program -changes at all.

- -While these factors mean you shouldn't trust the results to be super-accurate, -hopefully they should be close enough to be useful.

- - -

7.12  Todo

-
    -
  • Program start-up/shut-down calls a lot of functions that aren't - interesting and just complicate the output. Would be nice to exclude - these somehow.
  • -

    -

-
- - - diff --git a/cachegrind/docs/nav.html b/cachegrind/docs/nav.html deleted file mode 100644 index ad920ad443..0000000000 --- a/cachegrind/docs/nav.html +++ /dev/null @@ -1,72 +0,0 @@ - - - Valgrind - - - - - -
- Contents of this manual
- 1 Introduction
- 1.1 What Valgrind is for
- 1.2 What it does with - your program -

- 2 How to use it, and how to - make sense of the results
- 2.1 Getting started
- 2.2 The commentary
- 2.3 Reporting of errors
- 2.4 Suppressing errors
- 2.5 Command-line flags
- 2.6 Explanation of error messages
- 2.7 Writing suppressions files
- 2.8 The Client Request mechanism
- 2.9 Support for POSIX pthreads
- 2.10 Building and installing
- 2.11 If you have problems -

- 3 Details of the checking machinery
- 3.1 Valid-value (V) bits
- 3.2 Valid-address (A) bits
- 3.3 Putting it all together
- 3.4 Signals
- 3.5 Memory leak detection -

- 4 Limitations
-

- 5 How it works -- a rough overview
- 5.1 Getting started
- 5.2 The translation/instrumentation engine
- 5.3 Tracking the status of memory
- 5.4 System calls
- 5.5 Signals -

- 6 An example
-

- 7 Cache profiling -

- 8 The design and implementation of Valgrind
- - - diff --git a/cachegrind/docs/techdocs.html b/cachegrind/docs/techdocs.html deleted file mode 100644 index 2e1cc8b7e9..0000000000 --- a/cachegrind/docs/techdocs.html +++ /dev/null @@ -1,2524 +0,0 @@ - - - - The design and implementation of Valgrind - - - - -  -

The design and implementation of Valgrind

- -
-Detailed technical notes for hackers, maintainers and the -overly-curious
-These notes pertain to snapshot 20020306
-

-jseward@acm.org
-
http://developer.kde.org/~sewardj
-Copyright © 2000-2002 Julian Seward -

-Valgrind is licensed under the GNU General Public License, -version 2
-An open-source tool for finding memory-management problems in -x86 GNU/Linux executables. -

- -

- - - - -


- -

Introduction

- -This document contains a detailed, highly-technical description of the -internals of Valgrind. This is not the user manual; if you are an -end-user of Valgrind, you do not want to read this. Conversely, if -you really are a hacker-type and want to know how it works, I assume -that you have read the user manual thoroughly. -

-You may need to read this document several times, and carefully. Some -important things, I only say once. - - -

History

- -Valgrind came into public view in late Feb 2002. However, it has been -under contemplation for a very long time, perhaps seriously for about -five years. Somewhat over two years ago, I started working on the x86 -code generator for the Glasgow Haskell Compiler -(http://www.haskell.org/ghc), gaining familiarity with x86 internals -on the way. I then did Cacheprof (http://www.cacheprof.org), gaining -further x86 experience. Some time around Feb 2000 I started -experimenting with a user-space x86 interpreter for x86-Linux. This -worked, but it was clear that a JIT-based scheme would be necessary to -give reasonable performance for Valgrind. Design work for the JITter -started in earnest in Oct 2000, and by early 2001 I had an x86-to-x86 -dynamic translator which could run quite large programs. This -translator was in a sense pointless, since it did not do any -instrumentation or checking. - -

-Most of the rest of 2001 was taken up designing and implementing the -instrumentation scheme. The main difficulty, which consumed a lot -of effort, was to design a scheme which did not generate large numbers -of false uninitialised-value warnings. By late 2001 a satisfactory -scheme had been arrived at, and I started to test it on ever-larger -programs, with an eventual eye to making it work well enough so that -it was helpful to folks debugging the upcoming version 3 of KDE. I've -used KDE since before version 1.0, and wanted to Valgrind to be an -indirect contribution to the KDE 3 development effort. At the start of -Feb 02 the kde-core-devel crew started using it, and gave a huge -amount of helpful feedback and patches in the space of three weeks. -Snapshot 20020306 is the result. - -

-In the best Unix tradition, or perhaps in the spirit of Fred Brooks' -depressing-but-completely-accurate epitaph "build one to throw away; -you will anyway", much of Valgrind is a second or third rendition of -the initial idea. The instrumentation machinery -(vg_translate.c, vg_memory.c) and core CPU -simulation (vg_to_ucode.c, vg_from_ucode.c) -have had three redesigns and rewrites; the register allocator, -low-level memory manager (vg_malloc2.c) and symbol table -reader (vg_symtab2.c) are on the second rewrite. In a -sense, this document serves to record some of the knowledge gained as -a result. - - -

Design overview

- -Valgrind is compiled into a Linux shared object, -valgrind.so, and also a dummy one, -valgrinq.so, of which more later. The -valgrind shell script adds valgrind.so to -the LD_PRELOAD list of extra libraries to be -loaded with any dynamically linked library. This is a standard trick, -one which I assume the LD_PRELOAD mechanism was developed -to support. - -

-valgrind.so -is linked with the -z initfirst flag, which requests that -its initialisation code is run before that of any other object in the -executable image. When this happens, valgrind gains control. The -real CPU becomes "trapped" in valgrind.so and the -translations it generates. The synthetic CPU provided by Valgrind -does, however, return from this initialisation function. So the -normal startup actions, orchestrated by the dynamic linker -ld.so, continue as usual, except on the synthetic CPU, -not the real one. Eventually main is run and returns, -and then the finalisation code of the shared objects is run, -presumably in inverse order to which they were initialised. Remember, -this is still all happening on the simulated CPU. Eventually -valgrind.so's own finalisation code is called. It spots -this event, shuts down the simulated CPU, prints any error summaries -and/or does leak detection, and returns from the initialisation code -on the real CPU. At this point, in effect the real and synthetic CPUs -have merged back into one, Valgrind has lost control of the program, -and the program finally exit()s back to the kernel in the -usual way. - -

-The normal course of activity, one Valgrind has started up, is as -follows. Valgrind never runs any part of your program (usually -referred to as the "client"), not a single byte of it, directly. -Instead it uses function VG_(translate) to translate -basic blocks (BBs, straight-line sequences of code) into instrumented -translations, and those are run instead. The translations are stored -in the translation cache (TC), vg_tc, with the -translation table (TT), vg_tt supplying the -original-to-translation code address mapping. Auxiliary array -VG_(tt_fast) is used as a direct-map cache for fast -lookups in TT; it usually achieves a hit rate of around 98% and -facilitates an orig-to-trans lookup in 4 x86 insns, which is not bad. - -

-Function VG_(dispatch) in vg_dispatch.S is -the heart of the JIT dispatcher. Once a translated code address has -been found, it is executed simply by an x86 call -to the translation. At the end of the translation, the next -original code addr is loaded into %eax, and the -translation then does a ret, taking it back to the -dispatch loop, with, interestingly, zero branch mispredictions. -The address requested in %eax is looked up first in -VG_(tt_fast), and, if not found, by calling C helper -VG_(search_transtab). If there is still no translation -available, VG_(dispatch) exits back to the top-level -C dispatcher VG_(toploop), which arranges for -VG_(translate) to make a new translation. All fairly -unsurprising, really. There are various complexities described below. - -

-The translator, orchestrated by VG_(translate), is -complicated but entirely self-contained. It is described in great -detail in subsequent sections. Translations are stored in TC, with TT -tracking administrative information. The translations are subject to -an approximate LRU-based management scheme. With the current -settings, the TC can hold at most about 15MB of translations, and LRU -passes prune it to about 13.5MB. Given that the -orig-to-translation expansion ratio is about 13:1 to 14:1, this means -TC holds translations for more or less a megabyte of original code, -which generally comes to about 70000 basic blocks for C++ compiled -with optimisation on. Generating new translations is expensive, so it -is worth having a large TC to minimise the (capacity) miss rate. - -

-The dispatcher, VG_(dispatch), receives hints from -the translations which allow it to cheaply spot all control -transfers corresponding to x86 call and ret -instructions. It has to do this in order to spot some special events: -

    -
  • Calls to VG_(shutdown). This is Valgrind's cue to - exit. NOTE: actually this is done a different way; it should be - cleaned up. -

    -

  • Returns of system call handlers, to the return address - VG_(signalreturn_bogusRA). The signal simulator - needs to know when a signal handler is returning, so we spot - jumps (returns) to this address. -

    -

  • Calls to vg_trap_here. All malloc, - free, etc calls that the client program makes are - eventually routed to a call to vg_trap_here, - and Valgrind does its own special thing with these calls. - In effect this provides a trapdoor, by which Valgrind can - intercept certain calls on the simulated CPU, run the call as it - sees fit itself (on the real CPU), and return the result to - the simulated CPU, quite transparently to the client program. -
-Valgrind intercepts the client's malloc, -free, etc, -calls, so that it can store additional information. Each block -malloc'd by the client gives rise to a shadow block -in which Valgrind stores the call stack at the time of the -malloc -call. When the client calls free, Valgrind tries to -find the shadow block corresponding to the address passed to -free, and emits an error message if none can be found. -If it is found, the block is placed on the freed blocks queue -vg_freed_list, it is marked as inaccessible, and -its shadow block now records the call stack at the time of the -free call. Keeping free'd blocks in -this queue allows Valgrind to spot all (presumably invalid) accesses -to them. However, once the volume of blocks in the free queue -exceeds VG_(clo_freelist_vol), blocks are finally -removed from the queue. - -

-Keeping track of A and V bits (note: if you don't know what these are, -you haven't read the user guide carefully enough) for memory is done -in vg_memory.c. This implements a sparse array structure -which covers the entire 4G address space in a way which is reasonably -fast and reasonably space efficient. The 4G address space is divided -up into 64K sections, each covering 64Kb of address space. Given a -32-bit address, the top 16 bits are used to select one of the 65536 -entries in VG_(primary_map). The resulting "secondary" -(SecMap) holds A and V bits for the 64k of address space -chunk corresponding to the lower 16 bits of the address. - - -

Design decisions

- -Some design decisions were motivated by the need to make Valgrind -debuggable. Imagine you are writing a CPU simulator. It works fairly -well. However, you run some large program, like Netscape, and after -tens of millions of instructions, it crashes. How can you figure out -where in your simulator the bug is? - -

-Valgrind's answer is: cheat. Valgrind is designed so that it is -possible to switch back to running the client program on the real -CPU at any point. Using the --stop-after= flag, you can -ask Valgrind to run just some number of basic blocks, and then -run the rest of the way on the real CPU. If you are searching for -a bug in the simulated CPU, you can use this to do a binary search, -which quickly leads you to the specific basic block which is -causing the problem. - -

-This is all very handy. It does constrain the design in certain -unimportant ways. Firstly, the layout of memory, when viewed from the -client's point of view, must be identical regardless of whether it is -running on the real or simulated CPU. This means that Valgrind can't -do pointer swizzling -- well, no great loss -- and it can't run on -the same stack as the client -- again, no great loss. -Valgrind operates on its own stack, VG_(stack), which -it switches to at startup, temporarily switching back to the client's -stack when doing system calls for the client. - -

-Valgrind also receives signals on its own stack, -VG_(sigstack), but for different gruesome reasons -discussed below. - -

-This nice clean switch-back-to-the-real-CPU-whenever-you-like story -is muddied by signals. Problem is that signals arrive at arbitrary -times and tend to slightly perturb the basic block count, with the -result that you can get close to the basic block causing a problem but -can't home in on it exactly. My kludgey hack is to define -SIGNAL_SIMULATION to 1 towards the bottom of -vg_syscall_mem.c, so that signal handlers are run on the -real CPU and don't change the BB counts. - -

-A second hole in the switch-back-to-real-CPU story is that Valgrind's -way of delivering signals to the client is different from that of the -kernel. Specifically, the layout of the signal delivery frame, and -the mechanism used to detect a sighandler returning, are different. -So you can't expect to make the transition inside a sighandler and -still have things working, but in practice that's not much of a -restriction. - -

-Valgrind's implementation of malloc, free, -etc, (in vg_clientmalloc.c, not the low-level stuff in -vg_malloc2.c) is somewhat complicated by the need to -handle switching back at arbitrary points. It does work tho. - - - -

Correctness

- -There's only one of me, and I have a Real Life (tm) as well as hacking -Valgrind [allegedly :-]. That means I don't have time to waste -chasing endless bugs in Valgrind. My emphasis is therefore on doing -everything as simply as possible, with correctness, stability and -robustness being the number one priority, more important than -performance or functionality. As a result: -
    -
  • The code is absolutely loaded with assertions, and these are - permanently enabled. I have no plan to remove or disable - them later. Over the past couple of months, as valgrind has - become more widely used, they have shown their worth, pulling - up various bugs which would otherwise have appeared as - hard-to-find segmentation faults. -

    - I am of the view that it's acceptable to spend 5% of the total - running time of your valgrindified program doing assertion checks - and other internal sanity checks. -

    -

  • Aside from the assertions, valgrind contains various sets of - internal sanity checks, which get run at varying frequencies - during normal operation. VG_(do_sanity_checks) - runs every 1000 basic blocks, which means 500 to 2000 times/second - for typical machines at present. It checks that Valgrind hasn't - overrun its private stack, and does some simple checks on the - memory permissions maps. Once every 25 calls it does some more - extensive checks on those maps. Etc, etc. -

    - The following components also have sanity check code, which can - be enabled to aid debugging: -

      -
    • The low-level memory-manager - (VG_(mallocSanityCheckArena)). This does a - complete check of all blocks and chains in an arena, which - is very slow. Is not engaged by default. -

      -

    • The symbol table reader(s): various checks to ensure - uniqueness of mappings; see VG_(read_symbols) - for a start. Is permanently engaged. -

      -

    • The A and V bit tracking stuff in vg_memory.c. - This can be compiled with cpp symbol - VG_DEBUG_MEMORY defined, which removes all the - fast, optimised cases, and uses simple-but-slow fallbacks - instead. Not engaged by default. -

      -

    • Ditto VG_DEBUG_LEAKCHECK. -

      -

    • The JITter parses x86 basic blocks into sequences of - UCode instructions. It then sanity checks each one with - VG_(saneUInstr) and sanity checks the sequence - as a whole with VG_(saneUCodeBlock). This stuff - is engaged by default, and has caught some way-obscure bugs - in the simulated CPU machinery in its time. -

      -

    • The system call wrapper does - VG_(first_and_last_secondaries_look_plausible) after - every syscall; this is known to pick up bugs in the syscall - wrappers. Engaged by default. -

      -

    • The main dispatch loop, in VG_(dispatch), checks - that translations do not set %ebp to any value - different from VG_EBP_DISPATCH_CHECKED or - & VG_(baseBlock). In effect this test is free, - and is permanently engaged. -

      -

    • There are a couple of ifdefed-out consistency checks I - inserted whilst debugging the new register allocater, - vg_do_register_allocation. -
    -

    -

  • I try to avoid techniques, algorithms, mechanisms, etc, for which - I can supply neither a convincing argument that they are correct, - nor sanity-check code which might pick up bugs in my - implementation. I don't always succeed in this, but I try. - Basically the idea is: avoid techniques which are, in practice, - unverifiable, in some sense. When doing anything, always have in - mind: "how can I verify that this is correct?" -
- -

-Some more specific things are: - -

    -
  • Valgrind runs in the same namespace as the client, at least from - ld.so's point of view, and it therefore absolutely - had better not export any symbol with a name which could clash - with that of the client or any of its libraries. Therefore, all - globally visible symbols exported from valgrind.so - are defined using the VG_ CPP macro. As you'll see - from vg_constants.h, this appends some arbitrary - prefix to the symbol, in order that it be, we hope, globally - unique. Currently the prefix is vgPlain_. For - convenience there are also VGM_, VGP_ - and VGOFF_. All locally defined symbols are declared - static and do not appear in the final shared object. -

    - To check this, I periodically do - nm valgrind.so | grep " T ", - which shows you all the globally exported text symbols. - They should all have an approved prefix, except for those like - malloc, free, etc, which we deliberately - want to shadow and take precedence over the same names exported - from glibc.so, so that valgrind can intercept those - calls easily. Similarly, nm valgrind.so | grep " D " - allows you to find any rogue data-segment symbol names. -

    -

  • Valgrind tries, and almost succeeds, in being completely - independent of all other shared objects, in particular of - glibc.so. For example, we have our own low-level - memory manager in vg_malloc2.c, which is a fairly - standard malloc/free scheme augmented with arenas, and - vg_mylibc.c exports reimplementations of various bits - and pieces you'd normally get from the C library. -

    - Why all the hassle? Because imagine the potential chaos of both - the simulated and real CPUs executing in glibc.so. - It just seems simpler and cleaner to be completely self-contained, - so that only the simulated CPU visits glibc.so. In - practice it's not much hassle anyway. Also, valgrind starts up - before glibc has a chance to initialise itself, and who knows what - difficulties that could lead to. Finally, glibc has definitions - for some types, specifically sigset_t, which conflict - (are different from) the Linux kernel's idea of same. When - Valgrind wants to fiddle around with signal stuff, it wants to - use the kernel's definitions, not glibc's definitions. So it's - simplest just to keep glibc out of the picture entirely. -

    - To find out which glibc symbols are used by Valgrind, reinstate - the link flags -nostdlib -Wl,-no-undefined. This - causes linking to fail, but will tell you what you depend on. - I have mostly, but not entirely, got rid of the glibc - dependencies; what remains is, IMO, fairly harmless. AFAIK the - current dependencies are: memset, - memcmp, stat, system, - sbrk, setjmp and longjmp. - -

    -

  • Similarly, valgrind should not really import any headers other - than the Linux kernel headers, since it knows of no API other than - the kernel interface to talk to. At the moment this is really not - in a good state, and vg_syscall_mem imports, via - vg_unsafe.h, a significant number of C-library - headers so as to know the sizes of various structs passed across - the kernel boundary. This is of course completely bogus, since - there is no guarantee that the C library's definitions of these - structs matches those of the kernel. I have started to sort this - out using vg_kerneliface.h, into which I had intended - to copy all kernel definitions which valgrind could need, but this - has not gotten very far. At the moment it mostly contains - definitions for sigset_t and struct - sigaction, since the kernel's definition for these really - does clash with glibc's. I plan to use a vki_ prefix - on all these types and constants, to denote the fact that they - pertain to Valgrind's Kernel Interface. -

    - Another advantage of having a vg_kerneliface.h file - is that it makes it simpler to interface to a different kernel. - Once can, for example, easily imagine writing a new - vg_kerneliface.h for FreeBSD, or x86 NetBSD. - -

- -

Current limitations

- -No threads. I think fixing this is close to a research-grade problem. -

-No MMX. Fixing this should be relatively easy, using the same giant -trick used for x86 FPU instructions. See below. -

-Support for weird (non-POSIX) signal stuff is patchy. Does anybody -care? -

- - - - -


- -

The instrumenting JITter

- -This really is the heart of the matter. We begin with various side -issues. - -

Run-time storage, and the use of host registers

- -Valgrind translates client (original) basic blocks into instrumented -basic blocks, which live in the translation cache TC, until either the -client finishes or the translations are ejected from TC to make room -for newer ones. -

-Since it generates x86 code in memory, Valgrind has complete control -of the use of registers in the translations. Now pay attention. I -shall say this only once, and it is important you understand this. In -what follows I will refer to registers in the host (real) cpu using -their standard names, %eax, %edi, etc. I -refer to registers in the simulated CPU by capitalising them: -%EAX, %EDI, etc. These two sets of -registers usually bear no direct relationship to each other; there is -no fixed mapping between them. This naming scheme is used fairly -consistently in the comments in the sources. -

-Host registers, once things are up and running, are used as follows: -

    -
  • %esp, the real stack pointer, points - somewhere in Valgrind's private stack area, - VG_(stack) or, transiently, into its signal delivery - stack, VG_(sigstack). -

    -

  • %edi is used as a temporary in code generation; it - is almost always dead, except when used for the Left - value-tag operations. -

    -

  • %eax, %ebx, %ecx, - %edx and %esi are available to - Valgrind's register allocator. They are dead (carry unimportant - values) in between translations, and are live only in - translations. The one exception to this is %eax, - which, as mentioned far above, has a special significance to the - dispatch loop VG_(dispatch): when a translation - returns to the dispatch loop, %eax is expected to - contain the original-code-address of the next translation to run. - The register allocator is so good at minimising spill code that - using five regs and not having to save/restore %edi - actually gives better code than allocating to %edi - as well, but then having to push/pop it around special uses. -

    -

  • %ebp points permanently at - VG_(baseBlock). Valgrind's translations are - position-independent, partly because this is convenient, but also - because translations get moved around in TC as part of the LRUing - activity. All static entities which need to be referred to - from generated code, whether data or helper functions, are stored - starting at VG_(baseBlock) and are therefore reached - by indexing from %ebp. There is but one exception, - which is that by placing the value - VG_EBP_DISPATCH_CHECKED - in %ebp just before a return to the dispatcher, - the dispatcher is informed that the next address to run, - in %eax, requires special treatment. -

    -

  • The real machine's FPU state is pretty much unimportant, for - reasons which will become obvious. Ditto its %eflags - register. -
- -

-The state of the simulated CPU is stored in memory, in -VG_(baseBlock), which is a block of 200 words IIRC. -Recall that %ebp points permanently at the start of this -block. Function vg_init_baseBlock decides what the -offsets of various entities in VG_(baseBlock) are to be, -and allocates word offsets for them. The code generator then emits -%ebp relative addresses to get at those things. The -sequence in which entities are allocated has been carefully chosen so -that the 32 most popular entities come first, because this means 8-bit -offsets can be used in the generated code. - -

-If I was clever, I could make %ebp point 32 words along -VG_(baseBlock), so that I'd have another 32 words of -short-form offsets available, but that's just complicated, and it's -not important -- the first 32 words take 99% (or whatever) of the -traffic. - -

-Currently, the sequence of stuff in VG_(baseBlock) is as -follows: -

    -
  • 9 words, holding the simulated integer registers, - %EAX .. %EDI, and the simulated flags, - %EFLAGS. -

    -

  • Another 9 words, holding the V bit "shadows" for the above 9 regs. -

    -

  • The addresses of various helper routines called from - generated code: - VG_(helper_value_check4_fail), - VG_(helper_value_check0_fail), - which register V-check failures, - VG_(helperc_STOREV4), - VG_(helperc_STOREV1), - VG_(helperc_LOADV4), - VG_(helperc_LOADV1), - which do stores and loads of V bits to/from the - sparse array which keeps track of V bits in memory, - and - VGM_(handle_esp_assignment), which messes with - memory addressibility resulting from changes in %ESP. -

    -

  • The simulated %EIP. -

    -

  • 24 spill words, for when the register allocator can't make it work - with 5 measly registers. -

    -

  • Addresses of helpers VG_(helperc_STOREV2), - VG_(helperc_LOADV2). These are here because 2-byte - loads and stores are relatively rare, so are placed above the - magic 32-word offset boundary. -

    -

  • For similar reasons, addresses of helper functions - VGM_(fpu_write_check) and - VGM_(fpu_read_check), which handle the A/V maps - testing and changes required by FPU writes/reads. -

    -

  • Some other boring helper addresses: - VG_(helper_value_check2_fail) and - VG_(helper_value_check1_fail). These are probably - never emitted now, and should be removed. -

    -

  • The entire state of the simulated FPU, which I believe to be - 108 bytes long. -

    -

  • Finally, the addresses of various other helper functions in - vg_helpers.S, which deal with rare situations which - are tedious or difficult to generate code in-line for. -
- -

-As a general rule, the simulated machine's state lives permanently in -memory at VG_(baseBlock). However, the JITter does some -optimisations which allow the simulated integer registers to be -cached in real registers over multiple simulated instructions within -the same basic block. These are always flushed back into memory at -the end of every basic block, so that the in-memory state is -up-to-date between basic blocks. (This flushing is implied by the -statement above that the real machine's allocatable registers are -dead in between simulated blocks). - - -

Startup, shutdown, and system calls

- -Getting into of Valgrind (VG_(startup), called from -valgrind.so's initialisation section), really means -copying the real CPU's state into VG_(baseBlock), and -then installing our own stack pointer, etc, into the real CPU, and -then starting up the JITter. Exiting valgrind involves copying the -simulated state back to the real state. - -

-Unfortunately, there's a complication at startup time. Problem is -that at the point where we need to take a snapshot of the real CPU's -state, the offsets in VG_(baseBlock) are not set up yet, -because to do so would involve disrupting the real machine's state -significantly. The way round this is to dump the real machine's state -into a temporary, static block of memory, -VG_(m_state_static). We can then set up the -VG_(baseBlock) offsets at our leisure, and copy into it -from VG_(m_state_static) at some convenient later time. -This copying is done by -VG_(copy_m_state_static_to_baseBlock). - -

-On exit, the inverse transformation is (rather unnecessarily) used: -stuff in VG_(baseBlock) is copied to -VG_(m_state_static), and the assembly stub then copies -from VG_(m_state_static) into the real machine registers. - -

-Doing system calls on behalf of the client (vg_syscall.S) -is something of a half-way house. We have to make the world look -sufficiently like that which the client would normally have to make -the syscall actually work properly, but we can't afford to lose -control. So the trick is to copy all of the client's state, except -its program counter, into the real CPU, do the system call, and -copy the state back out. Note that the client's state includes its -stack pointer register, so one effect of this partial restoration is -to cause the system call to be run on the client's stack, as it should -be. - -

-As ever there are complications. We have to save some of our own state -somewhere when restoring the client's state into the CPU, so that we -can keep going sensibly afterwards. In fact the only thing which is -important is our own stack pointer, but for paranoia reasons I save -and restore our own FPU state as well, even though that's probably -pointless. - -

-The complication on the above complication is, that for horrible -reasons to do with signals, we may have to handle a second client -system call whilst the client is blocked inside some other system -call (unbelievable!). That means there's two sets of places to -dump Valgrind's stack pointer and FPU state across the syscall, -and we decide which to use by consulting -VG_(syscall_depth), which is in turn maintained by -VG_(wrap_syscall). - - - -

Introduction to UCode

- -UCode lies at the heart of the x86-to-x86 JITter. The basic premise -is that dealing the the x86 instruction set head-on is just too darn -complicated, so we do the traditional compiler-writer's trick and -translate it into a simpler, easier-to-deal-with form. - -

-In normal operation, translation proceeds through six stages, -coordinated by VG_(translate): -

    -
  1. Parsing of an x86 basic block into a sequence of UCode - instructions (VG_(disBB)). -

    -

  2. UCode optimisation (vg_improve), with the aim of - caching simulated registers in real registers over multiple - simulated instructions, and removing redundant simulated - %EFLAGS saving/restoring. -

    -

  3. UCode instrumentation (vg_instrument), which adds - value and address checking code. -

    -

  4. Post-instrumentation cleanup (vg_cleanup), removing - redundant value-check computations. -

    -

  5. Register allocation (vg_do_register_allocation), - which, note, is done on UCode. -

    -

  6. Emission of final instrumented x86 code - (VG_(emit_code)). -
- -

-Notice how steps 2, 3, 4 and 5 are simple UCode-to-UCode -transformation passes, all on straight-line blocks of UCode (type -UCodeBlock). Steps 2 and 4 are optimisation passes and -can be disabled for debugging purposes, with ---optimise=no and --cleanup=no respectively. - -

-Valgrind can also run in a no-instrumentation mode, given ---instrument=no. This is useful for debugging the JITter -quickly without having to deal with the complexity of the -instrumentation mechanism too. In this mode, steps 3 and 4 are -omitted. - -

-These flags combine, so that --instrument=no together with ---optimise=no means only steps 1, 5 and 6 are used. ---single-step=yes causes each x86 instruction to be -treated as a single basic block. The translations are terrible but -this is sometimes instructive. - -

-The --stop-after=N flag switches back to the real CPU -after N basic blocks. It also re-JITs the final basic -block executed and prints the debugging info resulting, so this -gives you a way to get a quick snapshot of how a basic block looks as -it passes through the six stages mentioned above. If you want to -see full information for every block translated (probably not, but -still ...) find, in VG_(translate), the lines -
dis = True; -
dis = debugging_translation; -
-and comment out the second line. This will spew out debugging -junk faster than you can possibly imagine. - - - -

UCode operand tags: type Tag

- -UCode is, more or less, a simple two-address RISC-like code. In -keeping with the x86 AT&T assembly syntax, generally speaking the -first operand is the source operand, and the second is the destination -operand, which is modified when the uinstr is notionally executed. - -

-UCode instructions have up to three operand fields, each of which has -a corresponding Tag describing it. Possible values for -the tag are: - -

    -
  • NoValue: indicates that the field is not in use. -

    -

  • Lit16: the field contains a 16-bit literal. -

    -

  • Literal: the field denotes a 32-bit literal, whose - value is stored in the lit32 field of the uinstr - itself. Since there is only one lit32 for the whole - uinstr, only one operand field may contain this tag. -

    -

  • SpillNo: the field contains a spill slot number, in - the range 0 to 23 inclusive, denoting one of the spill slots - contained inside VG_(baseBlock). Such tags only - exist after register allocation. -

    -

  • RealReg: the field contains a number in the range 0 - to 7 denoting an integer x86 ("real") register on the host. The - number is the Intel encoding for integer registers. Such tags - only exist after register allocation. -

    -

  • ArchReg: the field contains a number in the range 0 - to 7 denoting an integer x86 register on the simulated CPU. In - reality this means a reference to one of the first 8 words of - VG_(baseBlock). Such tags can exist at any point in - the translation process. -

    -

  • Last, but not least, TempReg. The field contains the - number of one of an infinite set of virtual (integer) - registers. TempRegs are used everywhere throughout - the translation process; you can have as many as you want. The - register allocator maps as many as it can into - RealRegs and turns the rest into - SpillNos, so TempRegs should not exist - after the register allocation phase. -

    - TempRegs are always 32 bits long, even if the data - they hold is logically shorter. In that case the upper unused - bits are required, and, I think, generally assumed, to be zero. - TempRegs holding V bits for quantities shorter than - 32 bits are expected to have ones in the unused places, since a - one denotes "undefined". -

- - -

UCode instructions: type UInstr

- -

-UCode was carefully designed to make it possible to do register -allocation on UCode and then translate the result into x86 code -without needing any extra registers ... well, that was the original -plan, anyway. Things have gotten a little more complicated since -then. In what follows, UCode instructions are referred to as uinstrs, -to distinguish them from x86 instructions. Uinstrs of course have -uopcodes which are (naturally) different from x86 opcodes. - -

-A uinstr (type UInstr) contains -various fields, not all of which are used by any one uopcode: -

    -
  • Three 16-bit operand fields, val1, val2 - and val3. -

    -

  • Three tag fields, tag1, tag2 - and tag3. Each of these has a value of type - Tag, - and they describe what the val1, val2 - and val3 fields contain. -

    -

  • A 32-bit literal field. -

    -

  • Two FlagSets, specifying which x86 condition codes are - read and written by the uinstr. -

    -

  • An opcode byte, containing a value of type Opcode. -

    -

  • A size field, indicating the data transfer size (1/2/4/8/10) in - cases where this makes sense, or zero otherwise. -

    -

  • A condition-code field, which, for jumps, holds a - value of type Condcode, indicating the condition - which applies. The encoding is as it is in the x86 insn stream, - except we add a 17th value CondAlways to indicate - an unconditional transfer. -

    -

  • Various 1-bit flags, indicating whether this insn pertains to an - x86 CALL or RET instruction, whether a widening is signed or not, - etc. -
- -

-UOpcodes (type Opcode) are divided into two groups: those -necessary merely to express the functionality of the x86 code, and -extra uopcodes needed to express the instrumentation. The former -group contains: -

    -
  • GET and PUT, which move values from the - simulated CPU's integer registers (ArchRegs) into - TempRegs, and back. GETF and - PUTF do the corresponding thing for the simulated - %EFLAGS. There are no corresponding insns for the - FPU register stack, since we don't explicitly simulate its - registers. -

    -

  • LOAD and STORE, which, in RISC-like - fashion, are the only uinstrs able to interact with memory. -

    -

  • MOV and CMOV allow unconditional and - conditional moves of values between TempRegs. -

    -

  • ALU operations. Again in RISC-like fashion, these only operate on - TempRegs (before reg-alloc) or RealRegs - (after reg-alloc). These are: ADD, ADC, - AND, OR, XOR, - SUB, SBB, SHL, - SHR, SAR, ROL, - ROR, RCL, RCR, - NOT, NEG, INC, - DEC, BSWAP, CC2VAL and - WIDEN. WIDEN does signed or unsigned - value widening. CC2VAL is used to convert condition - codes into a value, zero or one. The rest are obvious. -

    - To allow for more efficient code generation, we bend slightly the - restriction at the start of the previous para: for - ADD, ADC, XOR, - SUB and SBB, we allow the first (source) - operand to also be an ArchReg, that is, one of the - simulated machine's registers. Also, many of these ALU ops allow - the source operand to be a literal. See - VG_(saneUInstr) for the final word on the allowable - forms of uinstrs. -

    -

  • LEA1 and LEA2 are not strictly - necessary, but allow faciliate better translations. They - record the fancy x86 addressing modes in a direct way, which - allows those amodes to be emitted back into the final - instruction stream more or less verbatim. -

    -

  • CALLM calls a machine-code helper, one of the methods - whose address is stored at some VG_(baseBlock) - offset. PUSH and POP move values - to/from TempReg to the real (Valgrind's) stack, and - CLEAR removes values from the stack. - CALLM_S and CALLM_E delimit the - boundaries of call setups and clearings, for the benefit of the - instrumentation passes. Getting this right is critical, and so - VG_(saneUCodeBlock) makes various checks on the use - of these uopcodes. -

    - It is important to understand that these uopcodes have nothing to - do with the x86 call, return, - push or pop instructions, and are not - used to implement them. Those guys turn into combinations of - GET, PUT, LOAD, - STORE, ADD, SUB, and - JMP. What these uopcodes support is calling of - helper functions such as VG_(helper_imul_32_64), - which do stuff which is too difficult or tedious to emit inline. -

    -

  • FPU, FPU_R and FPU_W. - Valgrind doesn't attempt to simulate the internal state of the - FPU at all. Consequently it only needs to be able to distinguish - FPU ops which read and write memory from those that don't, and - for those which do, it needs to know the effective address and - data transfer size. This is made easier because the x86 FP - instruction encoding is very regular, basically consisting of - 16 bits for a non-memory FPU insn and 11 (IIRC) bits + an address mode - for a memory FPU insn. So our FPU uinstr carries - the 16 bits in its val1 field. And - FPU_R and FPU_W carry 11 bits in that - field, together with the identity of a TempReg or - (later) RealReg which contains the address. -

    -

  • JIFZ is unique, in that it allows a control-flow - transfer which is not deemed to end a basic block. It causes a - jump to a literal (original) address if the specified argument - is zero. -

    -

  • Finally, INCEIP advances the simulated - %EIP by the specified literal amount. This supports - lazy %EIP updating, as described below. -
- -

-Stages 1 and 2 of the 6-stage translation process mentioned above -deal purely with these uopcodes, and no others. They are -sufficient to express pretty much all the x86 32-bit protected-mode -instruction set, at -least everything understood by a pre-MMX original Pentium (P54C). - -

-Stages 3, 4, 5 and 6 also deal with the following extra -"instrumentation" uopcodes. They are used to express all the -definedness-tracking and -checking machinery which valgrind does. In -later sections we show how to create checking code for each of the -uopcodes above. Note that these instrumentation uopcodes, although -some appearing complicated, have been carefully chosen so that -efficient x86 code can be generated for them. GNU superopt v2.5 did a -great job helping out here. Anyways, the uopcodes are as follows: - -

    -
  • GETV and PUTV are analogues to - GET and PUT above. They are identical - except that they move the V bits for the specified values back and - forth to TempRegs, rather than moving the values - themselves. -

    -

  • Similarly, LOADV and STOREV read and - write V bits from the synthesised shadow memory that Valgrind - maintains. In fact they do more than that, since they also do - address-validity checks, and emit complaints if the read/written - addresses are unaddressible. -

    -

  • TESTV, whose parameters are a TempReg - and a size, tests the V bits in the TempReg, at the - specified operation size (0/1/2/4 byte) and emits an error if any - of them indicate undefinedness. This is the only uopcode capable - of doing such tests. -

    -

  • SETV, whose parameters are also TempReg - and a size, makes the V bits in the TempReg indicated - definedness, at the specified operation size. This is usually - used to generate the correct V bits for a literal value, which is - of course fully defined. -

    -

  • GETVF and PUTVF are analogues to - GETF and PUTF. They move the single V - bit used to model definedness of %EFLAGS between its - home in VG_(baseBlock) and the specified - TempReg. -

    -

  • TAG1 denotes one of a family of unary operations on - TempRegs containing V bits. Similarly, - TAG2 denotes one in a family of binary operations on - V bits. -
- -

-These 10 uopcodes are sufficient to express Valgrind's entire -definedness-checking semantics. In fact most of the interesting magic -is done by the TAG1 and TAG2 -suboperations. - -

-First, however, I need to explain about V-vector operation sizes. -There are 4 sizes: 1, 2 and 4, which operate on groups of 8, 16 and 32 -V bits at a time, supporting the usual 1, 2 and 4 byte x86 operations. -However there is also the mysterious size 0, which really means a -single V bit. Single V bits are used in various circumstances; in -particular, the definedness of %EFLAGS is modelled with a -single V bit. Now might be a good time to also point out that for -V bits, 1 means "undefined" and 0 means "defined". Similarly, for A -bits, 1 means "invalid address" and 0 means "valid address". This -seems counterintuitive (and so it is), but testing against zero on -x86s saves instructions compared to testing against all 1s, because -many ALU operations set the Z flag for free, so to speak. - -

-With that in mind, the tag ops are: - -

    -
  • (UNARY) Pessimising casts: VgT_PCast40, - VgT_PCast20, VgT_PCast10, - VgT_PCast01, VgT_PCast02 and - VgT_PCast04. A "pessimising cast" takes a V-bit - vector at one size, and creates a new one at another size, - pessimised in the sense that if any of the bits in the source - vector indicate undefinedness, then all the bits in the result - indicate undefinedness. In this case the casts are all to or from - a single V bit, so for example VgT_PCast40 is a - pessimising cast from 32 bits to 1, whereas - VgT_PCast04 simply copies the single source V bit - into all 32 bit positions in the result. Surprisingly, these ops - can all be implemented very efficiently. -

    - There are also the pessimising casts VgT_PCast14, - from 8 bits to 32, VgT_PCast12, from 8 bits to 16, - and VgT_PCast11, from 8 bits to 8. This last one - seems nonsensical, but in fact it isn't a no-op because, as - mentioned above, any undefined (1) bits in the source infect the - entire result. -

    -

  • (UNARY) Propagating undefinedness upwards in a word: - VgT_Left4, VgT_Left2 and - VgT_Left1. These are used to simulate the worst-case - effects of carry propagation in adds and subtracts. They return a - V vector identical to the original, except that if the original - contained any undefined bits, then it and all bits above it are - marked as undefined too. Hence the Left bit in the names. -

    -

  • (UNARY) Signed and unsigned value widening: - VgT_SWiden14, VgT_SWiden24, - VgT_SWiden12, VgT_ZWiden14, - VgT_ZWiden24 and VgT_ZWiden12. These - mimic the definedness effects of standard signed and unsigned - integer widening. Unsigned widening creates zero bits in the new - positions, so VgT_ZWiden* accordingly park mark - those parts of their argument as defined. Signed widening copies - the sign bit into the new positions, so VgT_SWiden* - copies the definedness of the sign bit into the new positions. - Because 1 means undefined and 0 means defined, these operations - can (fascinatingly) be done by the same operations which they - mimic. Go figure. -

    -

  • (BINARY) Undefined-if-either-Undefined, - Defined-if-either-Defined: VgT_UifU4, - VgT_UifU2, VgT_UifU1, - VgT_UifU0, VgT_DifD4, - VgT_DifD2, VgT_DifD1. These do simple - bitwise operations on pairs of V-bit vectors, with - UifU giving undefined if either arg bit is - undefined, and DifD giving defined if either arg bit - is defined. Abstract interpretation junkies, if any make it this - far, may like to think of them as meets and joins (or is it joins - and meets) in the definedness lattices. -

    -

  • (BINARY; one value, one V bits) Generate argument improvement - terms for AND and OR: VgT_ImproveAND4_TQ, - VgT_ImproveAND2_TQ, VgT_ImproveAND1_TQ, - VgT_ImproveOR4_TQ, VgT_ImproveOR2_TQ, - VgT_ImproveOR1_TQ. These help out with AND and OR - operations. AND and OR have the inconvenient property that the - definedness of the result depends on the actual values of the - arguments as well as their definedness. At the bit level: -
    1 AND undefined = undefined, but -
    0 AND undefined = 0, and similarly -
    0 OR undefined = undefined, but -
    1 OR undefined = 1. -
    -

    - It turns out that gcc (quite legitimately) generates code which - relies on this fact, so we have to model it properly in order to - avoid flooding users with spurious value errors. The ultimate - definedness result of AND and OR is calculated using - UifU on the definedness of the arguments, but we - also DifD in some "improvement" terms which - take into account the above phenomena. -

    - ImproveAND takes as its first argument the actual - value of an argument to AND (the T) and the definedness of that - argument (the Q), and returns a V-bit vector which is defined (0) - for bits which have value 0 and are defined; this, when - DifD into the final result causes those bits to be - defined even if the corresponding bit in the other argument is undefined. -

    - The ImproveOR ops do the dual thing for OR - arguments. Note that XOR does not have this property that one - argument can make the other irrelevant, so there is no need for - such complexity for XOR. -

- -

-That's all the tag ops. If you stare at this long enough, and then -run Valgrind and stare at the pre- and post-instrumented ucode, it -should be fairly obvious how the instrumentation machinery hangs -together. - -

-One point, if you do this: in order to make it easy to differentiate -TempRegs carrying values from TempRegs -carrying V bit vectors, Valgrind prints the former as (for example) -t28 and the latter as q28; the fact that -they carry the same number serves to indicate their relationship. -This is purely for the convenience of the human reader; the register -allocator and code generator don't regard them as different. - - -

Translation into UCode

- -VG_(disBB) allocates a new UCodeBlock and -then uses disInstr to translate x86 instructions one at a -time into UCode, dumping the result in the UCodeBlock. -This goes on until a control-flow transfer instruction is encountered. - -

-Despite the large size of vg_to_ucode.c, this translation -is really very simple. Each x86 instruction is translated entirely -independently of its neighbours, merrily allocating new -TempRegs as it goes. The idea is to have a simple -translator -- in reality, no more than a macro-expander -- and the -- -resulting bad UCode translation is cleaned up by the UCode -optimisation phase which follows. To give you an idea of some x86 -instructions and their translations (this is a complete basic block, -as Valgrind sees it): -

-        0x40435A50:  incl %edx
-
-           0: GETL      %EDX, t0
-           1: INCL      t0  (-wOSZAP)
-           2: PUTL      t0, %EDX
-
-        0x40435A51:  movsbl (%edx),%eax
-
-           3: GETL      %EDX, t2
-           4: LDB       (t2), t2
-           5: WIDENL_Bs t2
-           6: PUTL      t2, %EAX
-
-        0x40435A54:  testb $0x20, 1(%ecx,%eax,2)
-
-           7: GETL      %EAX, t6
-           8: GETL      %ECX, t8
-           9: LEA2L     1(t8,t6,2), t4
-          10: LDB       (t4), t10
-          11: MOVB      $0x20, t12
-          12: ANDB      t12, t10  (-wOSZACP)
-          13: INCEIPo   $9
-
-        0x40435A59:  jnz-8 0x40435A50
-
-          14: Jnzo      $0x40435A50  (-rOSZACP)
-          15: JMPo      $0x40435A5B
-
- -

-Notice how the block always ends with an unconditional jump to the -next block. This is a bit unnecessary, but makes many things simpler. - -

-Most x86 instructions turn into sequences of GET, -PUT, LEA1, LEA2, -LOAD and STORE. Some complicated ones -however rely on calling helper bits of code in -vg_helpers.S. The ucode instructions PUSH, -POP, CALL, CALLM_S and -CALLM_E support this. The calling convention is somewhat -ad-hoc and is not the C calling convention. The helper routines must -save all integer registers, and the flags, that they use. Args are -passed on the stack underneath the return address, as usual, and if -result(s) are to be returned, it (they) are either placed in dummy arg -slots created by the ucode PUSH sequence, or just -overwrite the incoming args. - -

-In order that the instrumentation mechanism can handle calls to these -helpers, VG_(saneUCodeBlock) enforces the following -restrictions on calls to helpers: - -

    -
  • Each CALL uinstr must be bracketed by a preceding - CALLM_S marker (dummy uinstr) and a trailing - CALLM_E marker. These markers are used by the - instrumentation mechanism later to establish the boundaries of the - PUSH, POP and CLEAR - sequences for the call. -

    -

  • PUSH, POP and CLEAR - may only appear inside sections bracketed by CALLM_S - and CALLM_E, and nowhere else. -

    -

  • In any such bracketed section, no two PUSH insns may - push the same TempReg. Dually, no two two - POPs may pop the same TempReg. -

    -

  • Finally, although this is not checked, args should be removed from - the stack with CLEAR, rather than POPs - into a TempReg which is not subsequently used. This - is because the instrumentation mechanism assumes that all values - POPped from the stack are actually used. -
- -Some of the translations may appear to have redundant -TempReg-to-TempReg moves. This helps the -next phase, UCode optimisation, to generate better code. - - - -

UCode optimisation

- -UCode is then subjected to an improvement pass -(vg_improve()), which blurs the boundaries between the -translations of the original x86 instructions. It's pretty -straightforward. Three transformations are done: - -
    -
  • Redundant GET elimination. Actually, more general - than that -- eliminates redundant fetches of ArchRegs. In our - running example, uinstr 3 GETs %EDX into - t2 despite the fact that, by looking at the previous - uinstr, it is already in t0. The GET is - therefore removed, and t2 renamed to t0. - Assuming t0 is allocated to a host register, it means - the simulated %EDX will exist in a host CPU register - for more than one simulated x86 instruction, which seems to me to - be a highly desirable property. -

    - There is some mucking around to do with subregisters; - %AL vs %AH %AX vs - %EAX etc. I can't remember how it works, but in - general we are very conservative, and these tend to invalidate the - caching. -

    -

  • Redundant PUT elimination. This annuls - PUTs of values back to simulated CPU registers if a - later PUT would overwrite the earlier - PUT value, and there is no intervening reads of the - simulated register (ArchReg). -

    - As before, we are paranoid when faced with subregister references. - Also, PUTs of %ESP are never annulled, - because it is vital the instrumenter always has an up-to-date - %ESP value available, %ESP changes - affect addressibility of the memory around the simulated stack - pointer. -

    - The implication of the above paragraph is that the simulated - machine's registers are only lazily updated once the above two - optimisation phases have run, with the exception of - %ESP. TempRegs go dead at the end of - every basic block, from which is is inferrable that any - TempReg caching a simulated CPU reg is flushed (back - into the relevant VG_(baseBlock) slot) at the end of - every basic block. The further implication is that the simulated - registers are only up-to-date at in between basic blocks, and not - at arbitrary points inside basic blocks. And the consequence of - that is that we can only deliver signals to the client in between - basic blocks. None of this seems any problem in practice. -

    -

  • Finally there is a simple def-use thing for condition codes. If - an earlier uinstr writes the condition codes, and the next uinsn - along which actually cares about the condition codes writes the - same or larger set of them, but does not read any, the earlier - uinsn is marked as not writing any condition codes. This saves - a lot of redundant cond-code saving and restoring. -
- -The effect of these transformations on our short block is rather -unexciting, and shown below. On longer basic blocks they can -dramatically improve code quality. - -
-at 3: delete GET, rename t2 to t0 in (4 .. 6)
-at 7: delete GET, rename t6 to t0 in (8 .. 9)
-at 1: annul flag write OSZAP due to later OSZACP
-
-Improved code:
-           0: GETL      %EDX, t0
-           1: INCL      t0
-           2: PUTL      t0, %EDX
-           4: LDB       (t0), t0
-           5: WIDENL_Bs t0
-           6: PUTL      t0, %EAX
-           8: GETL      %ECX, t8
-           9: LEA2L     1(t8,t0,2), t4
-          10: LDB       (t4), t10
-          11: MOVB      $0x20, t12
-          12: ANDB      t12, t10  (-wOSZACP)
-          13: INCEIPo   $9
-          14: Jnzo      $0x40435A50  (-rOSZACP)
-          15: JMPo      $0x40435A5B
-
- -

UCode instrumentation

- -Once you understand the meaning of the instrumentation uinstrs, -discussed in detail above, the instrumentation scheme is fairly -straighforward. Each uinstr is instrumented in isolation, and the -instrumentation uinstrs are placed before the original uinstr. -Our running example continues below. I have placed a blank line -after every original ucode, to make it easier to see which -instrumentation uinstrs correspond to which originals. - -

-As mentioned somewhere above, TempRegs carrying values -have names like t28, and each one has a shadow carrying -its V bits, with names like q28. This pairing aids in -reading instrumented ucode. - -

-One decision about all this is where to have "observation points", -that is, where to check that V bits are valid. I use a minimalistic -scheme, only checking where a failure of validity could cause the -original program to (seg)fault. So the use of values as memory -addresses causes a check, as do conditional jumps (these cause a check -on the definedness of the condition codes). And arguments -PUSHed for helper calls are checked, hence the wierd -restrictions on help call preambles described above. - -

-Another decision is that once a value is tested, it is thereafter -regarded as defined, so that we do not emit multiple undefined-value -errors for the same undefined value. That means that -TESTV uinstrs are always followed by SETV -on the same (shadow) TempRegs. Most of these -SETVs are redundant and are removed by the -post-instrumentation cleanup phase. - -

-The instrumentation for calling helper functions deserves further -comment. The definedness of results from a helper is modelled using -just one V bit. So, in short, we do pessimising casts of the -definedness of all the args, down to a single bit, and then -UifU these bits together. So this single V bit will say -"undefined" if any part of any arg is undefined. This V bit is then -pessimally cast back up to the result(s) sizes, as needed. If, by -seeing that all the args are got rid of with CLEAR and -none with POP, Valgrind sees that the result of the call -is not actually used, it immediately examines the result V bit with a -TESTV -- SETV pair. If it did not do this, -there would be no observation point to detect that the some of the -args to the helper were undefined. Of course, if the helper's results -are indeed used, we don't do this, since the result usage will -presumably cause the result definedness to be checked at some suitable -future point. - -

-In general Valgrind tries to track definedness on a bit-for-bit basis, -but as the above para shows, for calls to helpers we throw in the -towel and approximate down to a single bit. This is because it's too -complex and difficult to track bit-level definedness through complex -ops such as integer multiply and divide, and in any case there is no -reasonable code fragments which attempt to (eg) multiply two -partially-defined values and end up with something meaningful, so -there seems little point in modelling multiplies, divides, etc, in -that level of detail. - -

-Integer loads and stores are instrumented with firstly a test of the -definedness of the address, followed by a LOADV or -STOREV respectively. These turn into calls to -(for example) VG_(helperc_LOADV4). These helpers do two -things: they perform an address-valid check, and they load or store V -bits from/to the relevant address in the (simulated V-bit) memory. - -

-FPU loads and stores are different. As above the definedness of the -address is first tested. However, the helper routine for FPU loads -(VGM_(fpu_read_check)) emits an error if either the -address is invalid or the referenced area contains undefined values. -It has to do this because we do not simulate the FPU at all, and so -cannot track definedness of values loaded into it from memory, so we -have to check them as soon as they are loaded into the FPU, ie, at -this point. We notionally assume that everything in the FPU is -defined. - -

-It follows therefore that FPU writes first check the definedness of -the address, then the validity of the address, and finally mark the -written bytes as well-defined. - -

-If anyone is inspired to extend Valgrind to MMX/SSE insns, I suggest -you use the same trick. It works provided that the FPU/MMX unit is -not used to merely as a conduit to copy partially undefined data from -one place in memory to another. Unfortunately the integer CPU is used -like that (when copying C structs with holes, for example) and this is -the cause of much of the elaborateness of the instrumentation here -described. - -

-vg_instrument() in vg_translate.c actually -does the instrumentation. There are comments explaining how each -uinstr is handled, so we do not repeat that here. As explained -already, it is bit-accurate, except for calls to helper functions. -Unfortunately the x86 insns bt/bts/btc/btr are done by -helper fns, so bit-level accuracy is lost there. This should be fixed -by doing them inline; it will probably require adding a couple new -uinstrs. Also, left and right rotates through the carry flag (x86 -rcl and rcr) are approximated via a single -V bit; so far this has not caused anyone to complain. The -non-carry rotates, rol and ror, are much -more common and are done exactly. Re-visiting the instrumentation for -AND and OR, they seem rather verbose, and I wonder if it could be done -more concisely now. - -

-The lowercase o on many of the uopcodes in the running -example indicates that the size field is zero, usually meaning a -single-bit operation. - -

-Anyroads, the post-instrumented version of our running example looks -like this: - -

-Instrumented code:
-           0: GETVL     %EDX, q0
-           1: GETL      %EDX, t0
-
-           2: TAG1o     q0 = Left4 ( q0 )
-           3: INCL      t0
-
-           4: PUTVL     q0, %EDX
-           5: PUTL      t0, %EDX
-
-           6: TESTVL    q0
-           7: SETVL     q0
-           8: LOADVB    (t0), q0
-           9: LDB       (t0), t0
-
-          10: TAG1o     q0 = SWiden14 ( q0 )
-          11: WIDENL_Bs t0
-
-          12: PUTVL     q0, %EAX
-          13: PUTL      t0, %EAX
-
-          14: GETVL     %ECX, q8
-          15: GETL      %ECX, t8
-
-          16: MOVL      q0, q4
-          17: SHLL      $0x1, q4
-          18: TAG2o     q4 = UifU4 ( q8, q4 )
-          19: TAG1o     q4 = Left4 ( q4 )
-          20: LEA2L     1(t8,t0,2), t4
-
-          21: TESTVL    q4
-          22: SETVL     q4
-          23: LOADVB    (t4), q10
-          24: LDB       (t4), t10
-
-          25: SETVB     q12
-          26: MOVB      $0x20, t12
-
-          27: MOVL      q10, q14
-          28: TAG2o     q14 = ImproveAND1_TQ ( t10, q14 )
-          29: TAG2o     q10 = UifU1 ( q12, q10 )
-          30: TAG2o     q10 = DifD1 ( q14, q10 )
-          31: MOVL      q12, q14
-          32: TAG2o     q14 = ImproveAND1_TQ ( t12, q14 )
-          33: TAG2o     q10 = DifD1 ( q14, q10 )
-          34: MOVL      q10, q16
-          35: TAG1o     q16 = PCast10 ( q16 )
-          36: PUTVFo    q16
-          37: ANDB      t12, t10  (-wOSZACP)
-
-          38: INCEIPo   $9
-
-          39: GETVFo    q18
-          40: TESTVo    q18
-          41: SETVo     q18
-          42: Jnzo      $0x40435A50  (-rOSZACP)
-
-          43: JMPo      $0x40435A5B
-
- - -

UCode post-instrumentation cleanup

- -

-This pass, coordinated by vg_cleanup(), removes redundant -definedness computation created by the simplistic instrumentation -pass. It consists of two passes, -vg_propagate_definedness() followed by -vg_delete_redundant_SETVs. - -

-vg_propagate_definedness() is a simple -constant-propagation and constant-folding pass. It tries to determine -which TempRegs containing V bits will always indicate -"fully defined", and it propagates this information as far as it can, -and folds out as many operations as possible. For example, the -instrumentation for an ADD of a literal to a variable quantity will be -reduced down so that the definedness of the result is simply the -definedness of the variable quantity, since the literal is by -definition fully defined. - -

-vg_delete_redundant_SETVs removes SETVs on -shadow TempRegs for which the next action is a write. -I don't think there's anything else worth saying about this; it is -simple. Read the sources for details. - -

-So the cleaned-up running example looks like this. As above, I have -inserted line breaks after every original (non-instrumentation) uinstr -to aid readability. As with straightforward ucode optimisation, the -results in this block are undramatic because it is so short; longer -blocks benefit more because they have more redundancy which gets -eliminated. - - -

-at 29: delete UifU1 due to defd arg1
-at 32: change ImproveAND1_TQ to MOV due to defd arg2
-at 41: delete SETV
-at 31: delete MOV
-at 25: delete SETV
-at 22: delete SETV
-at 7: delete SETV
-
-           0: GETVL     %EDX, q0
-           1: GETL      %EDX, t0
-
-           2: TAG1o     q0 = Left4 ( q0 )
-           3: INCL      t0
-
-           4: PUTVL     q0, %EDX
-           5: PUTL      t0, %EDX
-
-           6: TESTVL    q0
-           8: LOADVB    (t0), q0
-           9: LDB       (t0), t0
-
-          10: TAG1o     q0 = SWiden14 ( q0 )
-          11: WIDENL_Bs t0
-
-          12: PUTVL     q0, %EAX
-          13: PUTL      t0, %EAX
-
-          14: GETVL     %ECX, q8
-          15: GETL      %ECX, t8
-
-          16: MOVL      q0, q4
-          17: SHLL      $0x1, q4
-          18: TAG2o     q4 = UifU4 ( q8, q4 )
-          19: TAG1o     q4 = Left4 ( q4 )
-          20: LEA2L     1(t8,t0,2), t4
-
-          21: TESTVL    q4
-          23: LOADVB    (t4), q10
-          24: LDB       (t4), t10
-
-          26: MOVB      $0x20, t12
-
-          27: MOVL      q10, q14
-          28: TAG2o     q14 = ImproveAND1_TQ ( t10, q14 )
-          30: TAG2o     q10 = DifD1 ( q14, q10 )
-          32: MOVL      t12, q14
-          33: TAG2o     q10 = DifD1 ( q14, q10 )
-          34: MOVL      q10, q16
-          35: TAG1o     q16 = PCast10 ( q16 )
-          36: PUTVFo    q16
-          37: ANDB      t12, t10  (-wOSZACP)
-
-          38: INCEIPo   $9
-          39: GETVFo    q18
-          40: TESTVo    q18
-          42: Jnzo      $0x40435A50  (-rOSZACP)
-
-          43: JMPo      $0x40435A5B
-
- - -

Translation from UCode

- -This is all very simple, even though vg_from_ucode.c -is a big file. Position-independent x86 code is generated into -a dynamically allocated array emitted_code; this is -doubled in size when it overflows. Eventually the array is handed -back to the caller of VG_(translate), who must copy -the result into TC and TT, and free the array. - -

-This file is structured into four layers of abstraction, which, -thankfully, are glued back together with extensive -__inline__ directives. From the bottom upwards: - -

    -
  • Address-mode emitters, emit_amode_regmem_reg et al. -

    -

  • Emitters for specific x86 instructions. There are quite a lot of - these, with names such as emit_movv_offregmem_reg. - The v suffix is Intel parlance for a 16/32 bit insn; - there are also b suffixes for 8 bit insns. -

    -

  • The next level up are the synth_* functions, which - synthesise possibly a sequence of raw x86 instructions to do some - simple task. Some of these are quite complex because they have to - work around Intel's silly restrictions on subregister naming. See - synth_nonshiftop_reg_reg for example. -

    -

  • Finally, at the top of the heap, we have - emitUInstr(), - which emits code for a single uinstr. -
- -

-Some comments: -

    -
  • The hack for FPU instructions becomes apparent here. To do a - FPU ucode instruction, we load the simulated FPU's - state into from its VG_(baseBlock) into the real FPU - using an x86 frstor insn, do the ucode - FPU insn on the real CPU, and write the updated FPU - state back into VG_(baseBlock) using an - fnsave instruction. This is pretty brutal, but is - simple and it works, and even seems tolerably efficient. There is - no attempt to cache the simulated FPU state in the real FPU over - multiple back-to-back ucode FPU instructions. -

    - FPU_R and FPU_W are also done this way, - with the minor complication that we need to patch in some - addressing mode bits so the resulting insn knows the effective - address to use. This is easy because of the regularity of the x86 - FPU instruction encodings. -

    -

  • An analogous trick is done with ucode insns which claim, in their - flags_r and flags_w fields, that they - read or write the simulated %EFLAGS. For such cases - we first copy the simulated %EFLAGS into the real - %eflags, then do the insn, then, if the insn says it - writes the flags, copy back to %EFLAGS. This is a - bit expensive, which is why the ucode optimisation pass goes to - some effort to remove redundant flag-update annotations. -
- -

-And so ... that's the end of the documentation for the instrumentating -translator! It's really not that complex, because it's composed as a -sequence of simple(ish) self-contained transformations on -straight-line blocks of code. - - -

Top-level dispatch loop

- -Urk. In VG_(toploop). This is basically boring and -unsurprising, not to mention fiddly and fragile. It needs to be -cleaned up. - -

-The only perhaps surprise is that the whole thing is run -on top of a setjmp-installed exception handler, because, -supposing a translation got a segfault, we have to bail out of the -Valgrind-supplied exception handler VG_(oursignalhandler) -and immediately start running the client's segfault handler, if it has -one. In particular we can't finish the current basic block and then -deliver the signal at some convenient future point, because signals -like SIGILL, SIGSEGV and SIGBUS mean that the faulting insn should not -simply be re-tried. (I'm sure there is a clearer way to explain this). - - -

Exceptions, creating new translations

-

Self-modifying code

- -

Lazy updates of the simulated program counter

- -Simulated %EIP is not updated after every simulated x86 -insn as this was regarded as too expensive. Instead ucode -INCEIP insns move it along as and when necessary. -Currently we don't allow it to fall more than 4 bytes behind reality -(see VG_(disBB) for the way this works). -

-Note that %EIP is always brought up to date by the inner -dispatch loop in VG_(dispatch), so that if the client -takes a fault we know at least which basic block this happened in. - - -

The translation cache and translation table

- -

Signals

- -Horrible, horrible. vg_signals.c. -Basically, since we have to intercept all system -calls anyway, we can see when the client tries to install a signal -handler. If it does so, we make a note of what the client asked to -happen, and ask the kernel to route the signal to our own signal -handler, VG_(oursignalhandler). This simply notes the -delivery of signals, and returns. - -

-Every 1000 basic blocks, we see if more signals have arrived. If so, -VG_(deliver_signals) builds signal delivery frames on the -client's stack, and allows their handlers to be run. Valgrind places -in these signal delivery frames a bogus return address, -VG_(signalreturn_bogusRA), and checks all jumps to see -if any jump to it. If so, this is a sign that a signal handler is -returning, and if so Valgrind removes the relevant signal frame from -the client's stack, restores the from the signal frame the simulated -state before the signal was delivered, and allows the client to run -onwards. We have to do it this way because some signal handlers never -return, they just longjmp(), which nukes the signal -delivery frame. - -

-The Linux kernel has a different but equally horrible hack for -detecting signal handler returns. Discovering it is left as an -exercise for the reader. - - - -

Errors, error contexts, error reporting, suppressions

-

Client malloc/free

-

Low-level memory management

-

A and V bitmaps

-

Symbol table management

-

Dealing with system calls

-

Namespace management

-

GDB attaching

-

Non-dependence on glibc or anything else

-

The leak detector

-

Performance problems

-

Continuous sanity checking

-

Tracing, or not tracing, child processes

-

Assembly glue for syscalls

- - -
- -

Extensions

- -Some comments about Stuff To Do. - -

Bugs

- -Stephan Kulow and Marc Mutz report problems with kmail in KDE 3 CVS -(RC2 ish) when run on Valgrind. Stephan has it deadlocking; Marc has -it looping at startup. I can't repro either behaviour. Needs -repro-ing and fixing. - - -

Threads

- -Doing a good job of thread support strikes me as almost a -research-level problem. The central issues are how to do fast cheap -locking of the VG_(primary_map) structure, whether or not -accesses to the individual secondary maps need locking, what -race-condition issues result, and whether the already-nasty mess that -is the signal simulator needs further hackery. - -

-I realise that threads are the most-frequently-requested feature, and -I am thinking about it all. If you have guru-level understanding of -fast mutual exclusion mechanisms and race conditions, I would be -interested in hearing from you. - - -

Verification suite

- -Directory tests/ contains various ad-hoc tests for -Valgrind. However, there is no systematic verification or regression -suite, that, for example, exercises all the stuff in -vg_memory.c, to ensure that illegal memory accesses and -undefined value uses are detected as they should be. It would be good -to have such a suite. - - -

Porting to other platforms

- -It would be great if Valgrind was ported to FreeBSD and x86 NetBSD, -and to x86 OpenBSD, if it's possible (doesn't OpenBSD use a.out-style -executables, not ELF ?) - -

-The main difficulties, for an x86-ELF platform, seem to be: - -

    -
  • You'd need to rewrite the /proc/self/maps parser - (vg_procselfmaps.c). - Easy. -

    -

  • You'd need to rewrite vg_syscall_mem.c, or, more - specifically, provide one for your OS. This is tedious, but you - can implement syscalls on demand, and the Linux kernel interface - is, for the most part, going to look very similar to the *BSD - interfaces, so it's really a copy-paste-and-modify-on-demand job. - As part of this, you'd need to supply a new - vg_kerneliface.h file. -

    -

  • You'd also need to change the syscall wrappers for Valgrind's - internal use, in vg_mylibc.c. -
- -All in all, I think a port to x86-ELF *BSDs is not really very -difficult, and in some ways I would like to see it happen, because -that would force a more clear factoring of Valgrind into platform -dependent and independent pieces. Not to mention, *BSD folks also -deserve to use Valgrind just as much as the Linux crew do. - - -

-


- -

Easy stuff which ought to be done

- -

MMX instructions

- -MMX insns should be supported, using the same trick as for FPU insns. -If the MMX registers are not used to copy uninitialised junk from one -place to another in memory, this means we don't have to actually -simulate the internal MMX unit state, so the FPU hack applies. This -should be fairly easy. - - - -

Fix stabs-info reader

- -The machinery in vg_symtab2.c which reads "stabs" style -debugging info is pretty weak. It usually correctly translates -simulated program counter values into line numbers and procedure -names, but the file name is often completely wrong. I think the -logic used to parse "stabs" entries is weak. It should be fixed. -The simplest solution, IMO, is to copy either the logic or simply the -code out of GNU binutils which does this; since GDB can clearly get it -right, binutils (or GDB?) must have code to do this somewhere. - - - - - -

BT/BTC/BTS/BTR

- -These are x86 instructions which test, complement, set, or reset, a -single bit in a word. At the moment they are both incorrectly -implemented and incorrectly instrumented. - -

-The incorrect instrumentation is due to use of helper functions. This -means we lose bit-level definedness tracking, which could wind up -giving spurious uninitialised-value use errors. The Right Thing to do -is to invent a couple of new UOpcodes, I think GET_BIT -and SET_BIT, which can be used to implement all 4 x86 -insns, get rid of the helpers, and give bit-accurate instrumentation -rules for the two new UOpcodes. - -

-I realised the other day that they are mis-implemented too. The x86 -insns take a bit-index and a register or memory location to access. -For registers the bit index clearly can only be in the range zero to -register-width minus 1, and I assumed the same applied to memory -locations too. But evidently not; for memory locations the index can -be arbitrary, and the processor will index arbitrarily into memory as -a result. This too should be fixed. Sigh. Presumably indexing -outside the immediate word is not actually used by any programs yet -tested on Valgrind, for otherwise they (presumably) would simply not -work at all. If you plan to hack on this, first check the Intel docs -to make sure my understanding is really correct. - - - -

Using PREFETCH instructions

- -Here's a small but potentially interesting project for performance -junkies. Experiments with valgrind's code generator and optimiser(s) -suggest that reducing the number of instructions executed in the -translations and mem-check helpers gives disappointingly small -performance improvements. Perhaps this is because performance of -Valgrindified code is limited by cache misses. After all, each read -in the original program now gives rise to at least three reads, one -for the VG_(primary_map), one of the resulting -secondary, and the original. Not to mention, the instrumented -translations are 13 to 14 times larger than the originals. All in all -one would expect the memory system to be hammered to hell and then -some. - -

-So here's an idea. An x86 insn involving a read from memory, after -instrumentation, will turn into ucode of the following form: -

-    ... calculate effective addr, into ta and qa ...
-    TESTVL qa             -- is the addr defined?
-    LOADV (ta), qloaded   -- fetch V bits for the addr
-    LOAD  (ta), tloaded   -- do the original load
-
-At the point where the LOADV is done, we know the actual -address (ta) from which the real LOAD will -be done. We also know that the LOADV will take around -20 x86 insns to do. So it seems plausible that doing a prefetch of -ta just before the LOADV might just avoid a -miss at the LOAD point, and that might be a significant -performance win. - -

-Prefetch insns are notoriously tempermental, more often than not -making things worse rather than better, so this would require -considerable fiddling around. It's complicated because Intels and -AMDs have different prefetch insns with different semantics, so that -too needs to be taken into account. As a general rule, even placing -the prefetches before the LOADV insn is too near the -LOAD; the ideal distance is apparently circa 200 CPU -cycles. So it might be worth having another analysis/transformation -pass which pushes prefetches as far back as possible, hopefully -immediately after the effective address becomes available. - -

-Doing too many prefetches is also bad because they soak up bus -bandwidth / cpu resources, so some cleverness in deciding which loads -to prefetch and which to not might be helpful. One can imagine not -prefetching client-stack-relative (%EBP or -%ESP) accesses, since the stack in general tends to show -good locality anyway. - -

-There's quite a lot of experimentation to do here, but I think it -might make an interesting week's work for someone. - -

-As of 15-ish March 2002, I've started to experiment with this, using -the AMD prefetch/prefetchw insns. - - - -

User-defined permission ranges

- -This is quite a large project -- perhaps a month's hacking for a -capable hacker to do a good job -- but it's potentially very -interesting. The outcome would be that Valgrind could detect a -whole class of bugs which it currently cannot. - -

-The presentation falls into two pieces. - -

-Part 1: user-defined address-range permission setting -

- -Valgrind intercepts the client's malloc, -free, etc calls, watches system calls, and watches the -stack pointer move. This is currently the only way it knows about -which addresses are valid and which not. Sometimes the client program -knows extra information about its memory areas. For example, the -client could at some point know that all elements of an array are -out-of-date. We would like to be able to convey to Valgrind this -information that the array is now addressable-but-uninitialised, so -that Valgrind can then warn if elements are used before they get new -values. - -

-What I would like are some macros like this: -

-   VALGRIND_MAKE_NOACCESS(addr, len)
-   VALGRIND_MAKE_WRITABLE(addr, len)
-   VALGRIND_MAKE_READABLE(addr, len)
-
-and also, to check that memory is addressible/initialised, -
-   VALGRIND_CHECK_ADDRESSIBLE(addr, len)
-   VALGRIND_CHECK_INITIALISED(addr, len)
-
- -

-I then include in my sources a header defining these macros, rebuild -my app, run under Valgrind, and get user-defined checks. - -

-Now here's a neat trick. It's a nuisance to have to re-link the app -with some new library which implements the above macros. So the idea -is to define the macros so that the resulting executable is still -completely stand-alone, and can be run without Valgrind, in which case -the macros do nothing, but when run on Valgrind, the Right Thing -happens. How to do this? The idea is for these macros to turn into a -piece of inline assembly code, which (1) has no effect when run on the -real CPU, (2) is easily spotted by Valgrind's JITter, and (3) no sane -person would ever write, which is important for avoiding false matches -in (2). So here's a suggestion: -

-   VALGRIND_MAKE_NOACCESS(addr, len)
-
-becomes (roughly speaking) -
-   movl addr, %eax
-   movl len,  %ebx
-   movl $1,   %ecx   -- 1 describes the action; MAKE_WRITABLE might be
-                     -- 2, etc
-   rorl $13, %ecx
-   rorl $19, %ecx
-   rorl $11, %eax
-   rorl $21, %eax
-
-The rotate sequences have no effect, and it's unlikely they would -appear for any other reason, but they define a unique byte-sequence -which the JITter can easily spot. Using the operand constraints -section at the end of a gcc inline-assembly statement, we can tell gcc -that the assembly fragment kills %eax, %ebx, -%ecx and the condition codes, so this fragment is made -harmless when not running on Valgrind, runs quickly when not on -Valgrind, and does not require any other library support. - - -

-Part 2: using it to detect interference between stack variables -

- -Currently Valgrind cannot detect errors of the following form: -

-void fooble ( void )
-{
-   int a[10];
-   int b[10];
-   a[10] = 99;
-}
-
-Now imagine rewriting this as -
-void fooble ( void )
-{
-   int spacer0;
-   int a[10];
-   int spacer1;
-   int b[10];
-   int spacer2;
-   VALGRIND_MAKE_NOACCESS(&spacer0, sizeof(int));
-   VALGRIND_MAKE_NOACCESS(&spacer1, sizeof(int));
-   VALGRIND_MAKE_NOACCESS(&spacer2, sizeof(int));
-   a[10] = 99;
-}
-
-Now the invalid write is certain to hit spacer0 or -spacer1, so Valgrind will spot the error. - -

-There are two complications. - -

-The first is that we don't want to annotate sources by hand, so the -Right Thing to do is to write a C/C++ parser, annotator, prettyprinter -which does this automatically, and run it on post-CPP'd C/C++ source. -See http://www.cacheprof.org for an example of a system which -transparently inserts another phase into the gcc/g++ compilation -route. The parser/prettyprinter is probably not as hard as it sounds; -I would write it in Haskell, a powerful functional language well -suited to doing symbolic computation, with which I am intimately -familar. There is already a C parser written in Haskell by someone in -the Haskell community, and that would probably be a good starting -point. - -

-The second complication is how to get rid of these -NOACCESS records inside Valgrind when the instrumented -function exits; after all, these refer to stack addresses and will -make no sense whatever when some other function happens to re-use the -same stack address range, probably shortly afterwards. I think I -would be inclined to define a special stack-specific macro -

-   VALGRIND_MAKE_NOACCESS_STACK(addr, len)
-
-which causes Valgrind to record the client's %ESP at the -time it is executed. Valgrind will then watch for changes in -%ESP and discard such records as soon as the protected -area is uncovered by an increase in %ESP. I hesitate -with this scheme only because it is potentially expensive, if there -are hundreds of such records, and considering that changes in -%ESP already require expensive messing with stack access -permissions. - -

-This is probably easier and more robust than for the instrumenter -program to try and spot all exit points for the procedure and place -suitable deallocation annotations there. Plus C++ procedures can -bomb out at any point if they get an exception, so spotting return -points at the source level just won't work at all. - -

-Although some work, it's all eminently doable, and it would make -Valgrind into an even-more-useful tool. - - -

- - -


- -

Cache profiling

-Valgrind is a very nice platform for doing cache profiling and other kinds of -simulation, because it converts horrible x86 instructions into nice clean -RISC-like UCode. For example, for cache profiling we are interested in -instructions that read and write memory; in UCode there are only four -instructions that do this: LOAD, STORE, -FPU_R and FPU_W. By contrast, because of the x86 -addressing modes, almost every instruction can read or write memory.

- -Most of the cache profiling machinery is in the file -vg_cachesim.c.

- -These notes are a somewhat haphazard guide to how Valgrind's cache profiling -works.

- -

Cost centres

-Valgrind gathers cache profiling about every instruction executed, -individually. Each instruction has a cost centre associated with it. -There are two kinds of cost centre: one for instructions that don't reference -memory (iCC), and one for instructions that do -(idCC): - -
-typedef struct _CC {
-   ULong a;
-   ULong m1;
-   ULong m2;
-} CC;
-
-typedef struct _iCC {
-   /* word 1 */
-   UChar tag;
-   UChar instr_size;
-
-   /* words 2+ */
-   Addr instr_addr;
-   CC I;
-} iCC;
-   
-typedef struct _idCC {
-   /* word 1 */
-   UChar tag;
-   UChar instr_size;
-   UChar data_size;
-
-   /* words 2+ */
-   Addr instr_addr;
-   CC I; 
-   CC D; 
-} idCC; 
-
- -Each CC has three fields a, m1, -m2 for recording references, level 1 misses and level 2 misses. -Each of these is a 64-bit ULong -- the numbers can get very large, -ie. greater than 4.2 billion allowed by a 32-bit unsigned int.

- -A iCC has one CC for instruction cache accesses. A -idCC has two, one for instruction cache accesses, and one for data -cache accesses.

- -The iCC and dCC structs also store unchanging -information about the instruction: -

    -
  • An instruction-type identification tag (explained below)
  • -

  • Instruction size
  • -

  • Data reference size (idCC only)
  • -

  • Instruction address
  • -

- -Note that data address is not one of the fields for idCC. This is -because for many memory-referencing instructions the data address can change -each time it's executed (eg. if it uses register-offset addressing). We have -to give this item to the cache simulation in a different way (see -Instrumentation section below). Some memory-referencing instructions do always -reference the same address, but we don't try to treat them specialy in order to -keep things simple.

- -Also note that there is only room for recording info about one data cache -access in an idCC. So what about instructions that do a read then -a write, such as: - -

inc %(esi)
- -In a write-allocate cache, as simulated by Valgrind, the write cannot miss, -since it immediately follows the read which will drag the block into the cache -if it's not already there. So the write access isn't really interesting, and -Valgrind doesn't record it. This means that Valgrind doesn't measure -memory references, but rather memory references that could miss in the cache. -This behaviour is the same as that used by the AMD Athlon hardware counters. -It also has the benefit of simplifying the implementation -- instructions that -read and write memory can be treated like instructions that read memory.

- -

Storing cost-centres

-Cost centres are stored in a way that makes them very cheap to lookup, which is -important since one is looked up for every original x86 instruction -executed.

- -Valgrind does JIT translations at the basic block level, and cost centres are -also setup and stored at the basic block level. By doing things carefully, we -store all the cost centres for a basic block in a contiguous array, and lookup -comes almost for free.

- -Consider this part of a basic block (for exposition purposes, pretend it's an -entire basic block): - -

-movl $0x0,%eax
-movl $0x99, -4(%ebp)
-
- -The translation to UCode looks like this: - -
-MOVL      $0x0, t20
-PUTL      t20, %EAX
-INCEIPo   $5
-
-LEA1L     -4(t4), t14
-MOVL      $0x99, t18
-STL       t18, (t14)
-INCEIPo   $7
-
- -The first step is to allocate the cost centres. This requires a preliminary -pass to count how many x86 instructions were in the basic block, and their -types (and thus sizes). UCode translations for single x86 instructions are -delimited by the INCEIPo instruction, the argument of which gives -the byte size of the instruction (note that lazy INCEIP updating is turned off -to allow this).

- -We can tell if an x86 instruction references memory by looking for -LDL and STL UCode instructions, and thus what kind of -cost centre is required. From this we can determine how many cost centres we -need for the basic block, and their sizes. We can then allocate them in a -single array.

- -Consider the example code above. After the preliminary pass, we know we need -two cost centres, one iCC and one dCC. So we -allocate an array to store these which looks like this: - -

-|(uninit)|      tag         (1 byte)
-|(uninit)|      instr_size  (1 bytes)
-|(uninit)|      (padding)   (2 bytes)
-|(uninit)|      instr_addr  (4 bytes)
-|(uninit)|      I.a         (8 bytes)
-|(uninit)|      I.m1        (8 bytes)
-|(uninit)|      I.m2        (8 bytes)
-
-|(uninit)|      tag         (1 byte)
-|(uninit)|      instr_size  (1 byte)
-|(uninit)|      data_size   (1 byte)
-|(uninit)|      (padding)   (1 byte)
-|(uninit)|      instr_addr  (4 bytes)
-|(uninit)|      I.a         (8 bytes)
-|(uninit)|      I.m1        (8 bytes)
-|(uninit)|      I.m2        (8 bytes)
-|(uninit)|      D.a         (8 bytes)
-|(uninit)|      D.m1        (8 bytes)
-|(uninit)|      D.m2        (8 bytes)
-
- -(We can see now why we need tags to distinguish between the two types of cost -centres.)

- -We also record the size of the array. We look up the debug info of the first -instruction in the basic block, and then stick the array into a table indexed -by filename and function name. This makes it easy to dump the information -quickly to file at the end.

- -

Instrumentation

-The instrumentation pass has two main jobs: - -
    -
  1. Fill in the gaps in the allocated cost centres.
  2. -

  3. Add UCode to call the cache simulator for each instruction.
  4. -

- -The instrumentation pass steps through the UCode and the cost centres in -tandem. As each original x86 instruction's UCode is processed, the appropriate -gaps in the instructions cost centre are filled in, for example: - -
-|INSTR_CC|      tag         (1 byte)
-|5       |      instr_size  (1 bytes)
-|(uninit)|      (padding)   (2 bytes)
-|i_addr1 |      instr_addr  (4 bytes)
-|0       |      I.a         (8 bytes)
-|0       |      I.m1        (8 bytes)
-|0       |      I.m2        (8 bytes)
-
-|WRITE_CC|      tag         (1 byte)
-|7       |      instr_size  (1 byte)
-|4       |      data_size   (1 byte)
-|(uninit)|      (padding)   (1 byte)
-|i_addr2 |      instr_addr  (4 bytes)
-|0       |      I.a         (8 bytes)
-|0       |      I.m1        (8 bytes)
-|0       |      I.m2        (8 bytes)
-|0       |      D.a         (8 bytes)
-|0       |      D.m1        (8 bytes)
-|0       |      D.m2        (8 bytes)
-
- -(Note that this step is not performed if a basic block is re-translated; see -here for more information.)

- -GCC inserts padding before the instr_size field so that it is word -aligned.

- -The instrumentation added to call the cache simulation function looks like this -(instrumentation is indented to distinguish it from the original UCode): - -

-MOVL      $0x0, t20
-PUTL      t20, %EAX
-  PUSHL     %eax
-  PUSHL     %ecx
-  PUSHL     %edx
-  MOVL      $0x4091F8A4, t46  # address of 1st CC
-  PUSHL     t46
-  CALLMo    $0x12             # second cachesim function
-  CLEARo    $0x4
-  POPL      %edx
-  POPL      %ecx
-  POPL      %eax
-INCEIPo   $5
-
-LEA1L     -4(t4), t14
-MOVL      $0x99, t18
-  MOVL      t14, t42
-STL       t18, (t14)
-  PUSHL     %eax
-  PUSHL     %ecx
-  PUSHL     %edx
-  PUSHL     t42
-  MOVL      $0x4091F8C4, t44  # address of 2nd CC
-  PUSHL     t44
-  CALLMo    $0x13             # second cachesim function
-  CLEARo    $0x8
-  POPL      %edx
-  POPL      %ecx
-  POPL      %eax
-INCEIPo   $7
-
- -Consider the first instruction's UCode. Each call is surrounded by three -PUSHL and POPL instructions to save and restore the -caller-save registers. Then the address of the instruction's cost centre is -pushed onto the stack, to be the first argument to the cache simulation -function. The address is known at this point because we are doing a -simultaneous pass through the cost centre array. This means the cost centre -lookup for each instruction is almost free (just the cost of pushing an -argument for a function call). Then the call to the cache simulation function -for non-memory-reference instructions is made (note that the -CALLMo UInstruction takes an offset into a table of predefined -functions; it is not an absolute address), and the single argument is -CLEARed from the stack.

- -The second instruction's UCode is similar. The only difference is that, as -mentioned before, we have to pass the address of the data item referenced to -the cache simulation function too. This explains the MOVL t14, -t42 and PUSHL t42 UInstructions. (Note that the seemingly -redundant MOVing will probably be optimised away during register -allocation.)

- -Note that instead of storing unchanging information about each instruction -(instruction size, data size, etc) in its cost centre, we could have passed in -these arguments to the simulation function. But this would slow the calls down -(two or three extra arguments pushed onto the stack). Also it would bloat the -UCode instrumentation by amounts similar to the space required for them in the -cost centre; bloated UCode would also fill the translation cache more quickly, -requiring more translations for large programs and slowing them down more.

- - -

Handling basic block retranslations

-The above description ignores one complication. Valgrind has a limited size -cache for basic block translations; if it fills up, old translations are -discarded. If a discarded basic block is executed again, it must be -re-translated.

- -However, we can't use this approach for profiling -- we can't throw away cost -centres for instructions in the middle of execution! So when a basic block is -translated, we first look for its cost centre array in the hash table. If -there is no cost centre array, it must be the first translation, so we proceed -as described above. But if there is a cost centre array already, it must be a -retranslation. In this case, we skip the cost centre allocation and -initialisation steps, but still do the UCode instrumentation step.

- -

The cache simulation

-The cache simulation is fairly straightforward. It just tracks which memory -blocks are in the cache at the moment (it doesn't track the contents, since -that is irrelevant).

- -The interface to the simulation is quite clean. The functions called from the -UCode contain calls to the simulation functions in the files -vg_cachesim_{I1,D1,L2}.c; these calls are inlined so that only -one function call is done per simulated x86 instruction. The file -vg_cachesim.c simply #includes the three files -containing the simulation, which makes plugging in new cache simulations is -very easy -- you just replace the three files and recompile.

- -

Output

-Output is fairly straightforward, basically printing the cost centre for every -instruction, grouped by files and functions. Total counts (eg. total cache -accesses, total L1 misses) are calculated when traversing this structure rather -than during execution, to save time; the cache simulation functions are called -so often that even one or two extra adds can make a sizeable difference.

- -Input file has the following format: - -

-file         ::= desc_line* cmd_line events_line data_line+ summary_line
-desc_line    ::= "desc:" ws? non_nl_string
-cmd_line     ::= "cmd:" ws? cmd
-events_line  ::= "events:" ws? (event ws)+
-data_line    ::= file_line | fn_line | count_line
-file_line    ::= ("fl=" | "fi=" | "fe=") filename
-fn_line      ::= "fn=" fn_name
-count_line   ::= line_num ws? (count ws)+
-summary_line ::= "summary:" ws? (count ws)+
-count        ::= num | "."
-
- -Where: - -
    -
  • non_nl_string is any string not containing a newline.
  • -

  • cmd is a command line invocation.
  • -

  • filename and fn_name can be anything.
  • -

  • num and line_num are decimal numbers.
  • -

  • ws is whitespace.
  • -

  • nl is a newline.
  • -

- -The contents of the "desc:" lines is printed out at the top of the summary. -This is a generic way of providing simulation specific information, eg. for -giving the cache configuration for cache simulation.

- -Counts can be "." to represent "N/A", eg. the number of write misses for an -instruction that doesn't write to memory.

- -The number of counts in each line and the -summary_line should not exceed the number of events in the -event_line. If the number in each line is less, -vg_annotate treats those missing as though they were a "." entry.

- -A file_line changes the current file name. A fn_line -changes the current function name. A count_line contains counts -that pertain to the current filename/fn_name. A "fn=" file_line -and a fn_line must appear before any count_lines to -give the context of the first count_lines.

- -Each file_line should be immediately followed by a -fn_line. "fi=" file_lines are used to switch -filenames for inlined functions; "fe=" file_lines are similar, but -are put at the end of a basic block in which the file name hasn't been switched -back to the original file name. (fi and fe lines behave the same, they are -only distinguished to help debugging.)

- - -

Summary of performance features

-Quite a lot of work has gone into making the profiling as fast as possible. -This is a summary of the important features: - -
    -
  • The basic block-level cost centre storage allows almost free cost centre - lookup.
  • - -

  • Only one function call is made per instruction simulated; even this - accounts for a sizeable percentage of execution time, but it seems - unavoidable if we want flexibility in the cache simulator.
  • - -

  • Unchanging information about an instruction is stored in its cost centre, - avoiding unnecessary argument pushing, and minimising UCode - instrumentation bloat.
  • - -

  • Summary counts are calculated at the end, rather than during - execution.
  • - -

  • The cachegrind.out output files can contain huge amounts of - information; file format was carefully chosen to minimise file - sizes.
  • -

- - -

Annotation

-Annotation is done by vg_annotate. It is a fairly straightforward Perl script -that slurps up all the cost centres, and then runs through all the chosen -source files, printing out cost centres with them. It too has been carefully -optimised. - - -

Similar work, extensions

-It would be relatively straightforward to do other simulations and obtain -line-by-line information about interesting events. A good example would be -branch prediction -- all branches could be instrumented to interact with a -branch prediction simulator, using very similar techniques to those described -above.

- -In particular, vg_annotate would not need to change -- the file format is such -that it is not specific to the cache simulation, but could be used for any kind -of line-by-line information. The only part of vg_annotate that is specific to -the cache simulation is the name of the input file -(cachegrind.out), although it would be very simple to add an -option to control this.

- - - diff --git a/corecheck/Makefile.am b/corecheck/Makefile.am deleted file mode 100644 index 60553ddac6..0000000000 --- a/corecheck/Makefile.am +++ /dev/null @@ -1,110 +0,0 @@ -SUBDIRS = demangle . docs tests - -CFLAGS = $(WERROR) -DVG_LIBDIR="\"$(libdir)"\" \ - -Winline -Wall -Wshadow -O -fomit-frame-pointer -g - -valdir = $(libdir)/valgrind - -LDFLAGS = -Wl,-z -Wl,initfirst - -INCLUDES = -I$(srcdir)/demangle - -bin_SCRIPTS = valgrind cachegrind vg_annotate - -SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp - -val_DATA = $(SUPP_FILES) default.supp - -BUILT_SOURCES = default.supp - -default.supp: $(SUPP_FILES) - -bzdist: dist - gunzip -c $(PACKAGE)-$(VERSION).tar.gz | bzip2 > $(PACKAGE)-$(VERSION).tar.bz2 - -EXTRA_DIST = $(val_DATA) \ - PATCHES_APPLIED ACKNOWLEDGEMENTS \ - README_KDE3_FOLKS README_PACKAGERS \ - README_MISSING_SYSCALL_OR_IOCTL TODO dosyms vg_libpthread.vs \ - valgrind.spec valgrind.spec.in - -val_PROGRAMS = valgrind.so valgrinq.so libpthread.so - -libpthread_so_SOURCES = vg_libpthread.c vg_libpthread_unimp.c - -valgrinq_so_SOURCES = vg_valgrinq_dummy.c - -valgrind_so_SOURCES = \ - vg_clientfuncs.c \ - vg_scheduler.c \ - vg_cachesim.c \ - vg_clientmalloc.c \ - vg_clientperms.c \ - vg_demangle.c \ - vg_dispatch.S \ - vg_errcontext.c \ - vg_execontext.c \ - vg_from_ucode.c \ - vg_helpers.S \ - vg_main.c \ - vg_malloc2.c \ - vg_memory.c \ - vg_messages.c \ - vg_mylibc.c \ - vg_procselfmaps.c \ - vg_profile.c \ - vg_signals.c \ - vg_startup.S \ - vg_symtab2.c \ - vg_syscall_mem.c \ - vg_syscall.S \ - vg_to_ucode.c \ - vg_translate.c \ - vg_transtab.c \ - vg_vtagops.c - -valgrind_so_LDADD = \ - demangle/cp-demangle.o \ - demangle/cplus-dem.o \ - demangle/dyn-string.o \ - demangle/safe-ctype.o - -include_HEADERS = valgrind.h - -noinst_HEADERS = \ - vg_cachesim_gen.c \ - vg_cachesim_I1.c \ - vg_cachesim_D1.c \ - vg_cachesim_L2.c \ - vg_kerneliface.h \ - vg_include.h \ - vg_constants.h \ - vg_unsafe.h - -MANUAL_DEPS = $(noinst_HEADERS) $(include_HEADERS) - -vg_memory.o: vg_memory.c $(MANUAL_DEPS) - $(COMPILE) -O2 @PREFERRED_STACK_BOUNDARY@ -c $< - -vg_clientfuncs.o: vg_clientfuncs.c $(MANUAL_DEPS) - $(COMPILE) -fno-omit-frame-pointer -c $< - -vg_libpthread.o: vg_libpthread.c $(MANUAL_DEPS) - $(COMPILE) -fno-omit-frame-pointer -c $< - -valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS) - $(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \ - $(valgrind_so_OBJECTS) $(valgrind_so_LDADD) - -valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS) - $(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS) - -libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs - $(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \ - $(libpthread_so_OBJECTS) \ - -Wl,-version-script $(srcdir)/vg_libpthread.vs - -install-exec-hook: - $(mkinstalldirs) $(DESTDIR)$(valdir) - rm -f $(DESTDIR)$(valdir)/libpthread.so.0 - $(LN_S) libpthread.so $(DESTDIR)$(valdir)/libpthread.so.0 diff --git a/coregrind/Makefile.am b/coregrind/Makefile.am deleted file mode 100644 index 60553ddac6..0000000000 --- a/coregrind/Makefile.am +++ /dev/null @@ -1,110 +0,0 @@ -SUBDIRS = demangle . docs tests - -CFLAGS = $(WERROR) -DVG_LIBDIR="\"$(libdir)"\" \ - -Winline -Wall -Wshadow -O -fomit-frame-pointer -g - -valdir = $(libdir)/valgrind - -LDFLAGS = -Wl,-z -Wl,initfirst - -INCLUDES = -I$(srcdir)/demangle - -bin_SCRIPTS = valgrind cachegrind vg_annotate - -SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp - -val_DATA = $(SUPP_FILES) default.supp - -BUILT_SOURCES = default.supp - -default.supp: $(SUPP_FILES) - -bzdist: dist - gunzip -c $(PACKAGE)-$(VERSION).tar.gz | bzip2 > $(PACKAGE)-$(VERSION).tar.bz2 - -EXTRA_DIST = $(val_DATA) \ - PATCHES_APPLIED ACKNOWLEDGEMENTS \ - README_KDE3_FOLKS README_PACKAGERS \ - README_MISSING_SYSCALL_OR_IOCTL TODO dosyms vg_libpthread.vs \ - valgrind.spec valgrind.spec.in - -val_PROGRAMS = valgrind.so valgrinq.so libpthread.so - -libpthread_so_SOURCES = vg_libpthread.c vg_libpthread_unimp.c - -valgrinq_so_SOURCES = vg_valgrinq_dummy.c - -valgrind_so_SOURCES = \ - vg_clientfuncs.c \ - vg_scheduler.c \ - vg_cachesim.c \ - vg_clientmalloc.c \ - vg_clientperms.c \ - vg_demangle.c \ - vg_dispatch.S \ - vg_errcontext.c \ - vg_execontext.c \ - vg_from_ucode.c \ - vg_helpers.S \ - vg_main.c \ - vg_malloc2.c \ - vg_memory.c \ - vg_messages.c \ - vg_mylibc.c \ - vg_procselfmaps.c \ - vg_profile.c \ - vg_signals.c \ - vg_startup.S \ - vg_symtab2.c \ - vg_syscall_mem.c \ - vg_syscall.S \ - vg_to_ucode.c \ - vg_translate.c \ - vg_transtab.c \ - vg_vtagops.c - -valgrind_so_LDADD = \ - demangle/cp-demangle.o \ - demangle/cplus-dem.o \ - demangle/dyn-string.o \ - demangle/safe-ctype.o - -include_HEADERS = valgrind.h - -noinst_HEADERS = \ - vg_cachesim_gen.c \ - vg_cachesim_I1.c \ - vg_cachesim_D1.c \ - vg_cachesim_L2.c \ - vg_kerneliface.h \ - vg_include.h \ - vg_constants.h \ - vg_unsafe.h - -MANUAL_DEPS = $(noinst_HEADERS) $(include_HEADERS) - -vg_memory.o: vg_memory.c $(MANUAL_DEPS) - $(COMPILE) -O2 @PREFERRED_STACK_BOUNDARY@ -c $< - -vg_clientfuncs.o: vg_clientfuncs.c $(MANUAL_DEPS) - $(COMPILE) -fno-omit-frame-pointer -c $< - -vg_libpthread.o: vg_libpthread.c $(MANUAL_DEPS) - $(COMPILE) -fno-omit-frame-pointer -c $< - -valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS) - $(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \ - $(valgrind_so_OBJECTS) $(valgrind_so_LDADD) - -valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS) - $(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS) - -libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs - $(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \ - $(libpthread_so_OBJECTS) \ - -Wl,-version-script $(srcdir)/vg_libpthread.vs - -install-exec-hook: - $(mkinstalldirs) $(DESTDIR)$(valdir) - rm -f $(DESTDIR)$(valdir)/libpthread.so.0 - $(LN_S) libpthread.so $(DESTDIR)$(valdir)/libpthread.so.0 diff --git a/coregrind/arch/x86-linux/vg_libpthread.c b/coregrind/arch/x86-linux/vg_libpthread.c deleted file mode 100644 index 60c4dc95ff..0000000000 --- a/coregrind/arch/x86-linux/vg_libpthread.c +++ /dev/null @@ -1,2850 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- A replacement for the standard libpthread.so. ---*/ -/*--- vg_libpthread.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -/* ALL THIS CODE RUNS ON THE SIMULATED CPU. - - This is a replacement for the standard libpthread.so. It is loaded - as part of the client's image (if required) and directs pthread - calls through to Valgrind's request mechanism. - - A couple of caveats. - - 1. Since it's a binary-compatible replacement for an existing library, - we must take care to used exactly the same data layouts, etc, as - the standard pthread.so does. - - 2. Since this runs as part of the client, there are no specific - restrictions on what headers etc we can include, so long as - this libpthread.so does not end up having dependencies on .so's - which the real one doesn't. - - Later ... it appears we cannot call file-related stuff in libc here, - perhaps fair enough. Be careful what you call from here. Even exit() - doesn't work (gives infinite recursion and then stack overflow); hence - myexit(). Also fprintf doesn't seem safe. -*/ - -#include "valgrind.h" /* For the request-passing mechanism */ -#include "vg_include.h" /* For the VG_USERREQ__* constants */ - -#define __USE_UNIX98 -#include -#include -#undef __USE_UNIX98 - -#include -#include -#ifdef GLIBC_2_1 -#include -#endif - -#include - - -/* --------------------------------------------------------------------- - Forwardses. - ------------------------------------------------------------------ */ - -static void wait_for_fd_to_be_readable_or_erring ( int fd ); - -static -int my_do_syscall2 ( int syscallno, - int arg1, int arg2 ); - - -/* --------------------------------------------------------------------- - Helpers. We have to be pretty self-sufficient. - ------------------------------------------------------------------ */ - -/* Number of times any given error message is printed. */ -#define N_MOANS 3 - -/* Extract from Valgrind the value of VG_(clo_trace_pthread_level). - Returns 0 (none) if not running on Valgrind. */ -static -int get_pt_trace_level ( void ) -{ - int res; - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__GET_PTHREAD_TRACE_LEVEL, - 0, 0, 0, 0); - return res; -} - - -static -void my_exit ( int arg ) -{ - int __res; - __asm__ volatile ("movl %%ecx, %%ebx ; int $0x80" - : "=a" (__res) - : "0" (__NR_exit), - "c" (arg) ); - /* We don't bother to mention the fact that this asm trashes %ebx, - since it won't return. If you ever do let it return ... fix - this! */ -} - - -/* We need this guy -- it's in valgrind.so. */ -extern void VG_(startup) ( void ); - - -/* Just start up Valgrind if it's not already going. VG_(startup)() - detects and ignores second and subsequent calls. */ -static __inline__ -void ensure_valgrind ( char* caller ) -{ - VG_(startup)(); -} - -/* While we're at it ... hook our own startup function into this - game. */ -__asm__ ( - ".section .init\n" - "\tcall vgPlain_startup" -); - - -static -__attribute__((noreturn)) -void barf ( char* str ) -{ - char buf[100]; - buf[0] = 0; - strcat(buf, "\nvalgrind's libpthread.so: "); - strcat(buf, str); - strcat(buf, "\n\n"); - write(2, buf, strlen(buf)); - my_exit(1); - /* We have to persuade gcc into believing this doesn't return. */ - while (1) { }; -} - - -static void ignored ( char* msg ) -{ - if (get_pt_trace_level() >= 0) { - char* ig = "valgrind's libpthread.so: IGNORED call to: "; - write(2, ig, strlen(ig)); - write(2, msg, strlen(msg)); - ig = "\n"; - write(2, ig, strlen(ig)); - } -} - -static void kludged ( char* msg ) -{ - if (get_pt_trace_level() >= 0) { - char* ig = "valgrind's libpthread.so: KLUDGED call to: "; - write(2, ig, strlen(ig)); - write(2, msg, strlen(msg)); - ig = "\n"; - write(2, ig, strlen(ig)); - } -} - -static void not_inside ( char* msg ) -{ - VG_(startup)(); -} - -__attribute__((noreturn)) -void vgPlain_unimp ( char* what ) -{ - char* ig = "valgrind's libpthread.so: UNIMPLEMENTED FUNCTION: "; - write(2, ig, strlen(ig)); - write(2, what, strlen(what)); - ig = "\n"; - write(2, ig, strlen(ig)); - barf("Please report this bug to me at: jseward@acm.org"); -} - - -static -void my_assert_fail ( Char* expr, Char* file, Int line, Char* fn ) -{ - static Bool entered = False; - if (entered) - my_exit(2); - entered = True; - fprintf(stderr, "\n%s: %s:%d (%s): Assertion `%s' failed.\n", - "valgrind", file, line, fn, expr ); - fprintf(stderr, "Please report this bug to me at: %s\n\n", - VG_EMAIL_ADDR); - my_exit(1); -} - -#define MY__STRING(__str) #__str - -#define my_assert(expr) \ - ((void) ((expr) ? 0 : \ - (my_assert_fail (MY__STRING(expr), \ - __FILE__, __LINE__, \ - __PRETTY_FUNCTION__), 0))) - - -/* --------------------------------------------------------------------- - Pass pthread_ calls to Valgrind's request mechanism. - ------------------------------------------------------------------ */ - -#include -#include /* gettimeofday */ - - -/* --------------------------------------------------- - Ummm .. - ------------------------------------------------ */ - -static -void pthread_error ( const char* msg ) -{ - int res; - VALGRIND_MAGIC_SEQUENCE(res, 0, - VG_USERREQ__PTHREAD_ERROR, - msg, 0, 0, 0); -} - - -/* --------------------------------------------------- - THREAD ATTRIBUTES - ------------------------------------------------ */ - -int pthread_attr_init(pthread_attr_t *attr) -{ - /* Just initialise the fields which we might look at. */ - attr->__detachstate = PTHREAD_CREATE_JOINABLE; - return 0; -} - -int pthread_attr_setdetachstate(pthread_attr_t *attr, int detachstate) -{ - if (detachstate != PTHREAD_CREATE_JOINABLE - && detachstate != PTHREAD_CREATE_DETACHED) { - pthread_error("pthread_attr_setdetachstate: " - "detachstate is invalid"); - return EINVAL; - } - attr->__detachstate = detachstate; - return 0; -} - -int pthread_attr_setinheritsched(pthread_attr_t *attr, int inherit) -{ - static int moans = N_MOANS; - if (moans-- > 0) - ignored("pthread_attr_setinheritsched"); - return 0; -} - -__attribute__((weak)) -int pthread_attr_setstacksize (pthread_attr_t *__attr, - size_t __stacksize) -{ - size_t limit; - char buf[1024]; - ensure_valgrind("pthread_attr_setstacksize"); - limit = VG_PTHREAD_STACK_SIZE - VG_AR_CLIENT_STACKBASE_REDZONE_SZB - - 1000; /* paranoia */ - if (__stacksize < limit) - return 0; - snprintf(buf, sizeof(buf), "pthread_attr_setstacksize: " - "requested size %d >= VG_PTHREAD_STACK_SIZE\n " - "edit vg_include.h and rebuild.", __stacksize); - buf[sizeof(buf)-1] = '\0'; /* Make sure it is zero terminated */ - barf(buf); -} - - -/* This is completely bogus. */ -int pthread_attr_getschedparam(const pthread_attr_t *attr, - struct sched_param *param) -{ - static int moans = N_MOANS; - if (moans-- > 0) - kludged("pthread_attr_getschedparam"); -# ifdef HAVE_SCHED_PRIORITY - if (param) param->sched_priority = 0; /* who knows */ -# else - if (param) param->__sched_priority = 0; /* who knows */ -# endif - return 0; -} - -int pthread_attr_setschedparam(pthread_attr_t *attr, - const struct sched_param *param) -{ - static int moans = N_MOANS; - if (moans-- > 0) - ignored("pthread_attr_setschedparam"); - return 0; -} - -int pthread_attr_destroy(pthread_attr_t *attr) -{ - static int moans = N_MOANS; - if (moans-- > 0) - ignored("pthread_attr_destroy"); - return 0; -} - -/* These are no-ops, as with LinuxThreads. */ -int pthread_attr_setscope ( pthread_attr_t *attr, int scope ) -{ - ensure_valgrind("pthread_attr_setscope"); - if (scope == PTHREAD_SCOPE_SYSTEM) - return 0; - pthread_error("pthread_attr_setscope: " - "invalid or unsupported scope"); - if (scope == PTHREAD_SCOPE_PROCESS) - return ENOTSUP; - return EINVAL; -} - -int pthread_attr_getscope ( const pthread_attr_t *attr, int *scope ) -{ - ensure_valgrind("pthread_attr_setscope"); - if (scope) - *scope = PTHREAD_SCOPE_SYSTEM; - return 0; -} - - -/* Pretty bogus. Avoid if possible. */ -int pthread_getattr_np (pthread_t thread, pthread_attr_t *attr) -{ - int detached; - size_t limit; - ensure_valgrind("pthread_getattr_np"); - kludged("pthread_getattr_np"); - limit = VG_PTHREAD_STACK_SIZE - VG_AR_CLIENT_STACKBASE_REDZONE_SZB - - 1000; /* paranoia */ - attr->__detachstate = PTHREAD_CREATE_JOINABLE; - attr->__schedpolicy = SCHED_OTHER; - attr->__schedparam.sched_priority = 0; - attr->__inheritsched = PTHREAD_EXPLICIT_SCHED; - attr->__scope = PTHREAD_SCOPE_SYSTEM; - attr->__guardsize = VKI_BYTES_PER_PAGE; - attr->__stackaddr = NULL; - attr->__stackaddr_set = 0; - attr->__stacksize = limit; - VALGRIND_MAGIC_SEQUENCE(detached, (-1) /* default */, - VG_USERREQ__SET_OR_GET_DETACH, - 2 /* get */, thread, 0, 0); - my_assert(detached == 0 || detached == 1); - if (detached) - attr->__detachstate = PTHREAD_CREATE_DETACHED; - return 0; -} - - -/* Bogus ... */ -int pthread_attr_getstackaddr ( const pthread_attr_t * attr, - void ** stackaddr ) -{ - ensure_valgrind("pthread_attr_getstackaddr"); - kludged("pthread_attr_getstackaddr"); - if (stackaddr) - *stackaddr = NULL; - return 0; -} - -/* Not bogus (!) */ -int pthread_attr_getstacksize ( const pthread_attr_t * _attr, - size_t * __stacksize ) -{ - size_t limit; - ensure_valgrind("pthread_attr_getstacksize"); - limit = VG_PTHREAD_STACK_SIZE - VG_AR_CLIENT_STACKBASE_REDZONE_SZB - - 1000; /* paranoia */ - if (__stacksize) - *__stacksize = limit; - return 0; -} - -int pthread_attr_setschedpolicy(pthread_attr_t *attr, int policy) -{ - if (policy != SCHED_OTHER && policy != SCHED_FIFO && policy != SCHED_RR) - return EINVAL; - attr->__schedpolicy = policy; - return 0; -} - -int pthread_attr_getschedpolicy(const pthread_attr_t *attr, int *policy) -{ - *policy = attr->__schedpolicy; - return 0; -} - - -/* --------------------------------------------------- - Helper functions for running a thread - and for clearing up afterwards. - ------------------------------------------------ */ - -/* All exiting threads eventually pass through here, bearing the - return value, or PTHREAD_CANCELED, in ret_val. */ -static -__attribute__((noreturn)) -void thread_exit_wrapper ( void* ret_val ) -{ - int detached, res; - CleanupEntry cu; - pthread_key_t key; - - /* Run this thread's cleanup handlers. */ - while (1) { - VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */, - VG_USERREQ__CLEANUP_POP, - &cu, 0, 0, 0); - if (res == -1) break; /* stack empty */ - my_assert(res == 0); - if (0) printf("running exit cleanup handler"); - cu.fn ( cu.arg ); - } - - /* Run this thread's key finalizers. Really this should be run - PTHREAD_DESTRUCTOR_ITERATIONS times. */ - for (key = 0; key < VG_N_THREAD_KEYS; key++) { - VALGRIND_MAGIC_SEQUENCE(res, (-2) /* default */, - VG_USERREQ__GET_KEY_D_AND_S, - key, &cu, 0, 0 ); - if (res == 0) { - /* valid key */ - if (cu.fn && cu.arg) - cu.fn /* destructor for key */ - ( cu.arg /* specific for key for this thread */ ); - continue; - } - my_assert(res == -1); - } - - /* Decide on my final disposition. */ - VALGRIND_MAGIC_SEQUENCE(detached, (-1) /* default */, - VG_USERREQ__SET_OR_GET_DETACH, - 2 /* get */, pthread_self(), 0, 0); - my_assert(detached == 0 || detached == 1); - - if (detached) { - /* Detached; I just quit right now. */ - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__QUIT, 0, 0, 0, 0); - } else { - /* Not detached; so I wait for a joiner. */ - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__WAIT_JOINER, ret_val, 0, 0, 0); - } - /* NOTREACHED */ - barf("thread_exit_wrapper: still alive?!"); -} - - -/* This function is a wrapper function for running a thread. It runs - the root function specified in pthread_create, and then, should the - root function return a value, it arranges to run the thread's - cleanup handlers and exit correctly. */ - -/* Struct used to convey info from pthread_create to thread_wrapper. - Must be careful not to pass to the child thread any pointers to - objects which might be on the parent's stack. */ -typedef - struct { - int attr__detachstate; - void* (*root_fn) ( void* ); - void* arg; - } - NewThreadInfo; - - -/* This is passed to the VG_USERREQ__APPLY_IN_NEW_THREAD and so must - not return. Note that this runs in the new thread, not the - parent. */ -static -__attribute__((noreturn)) -void thread_wrapper ( NewThreadInfo* info ) -{ - int res; - int attr__detachstate; - void* (*root_fn) ( void* ); - void* arg; - void* ret_val; - - attr__detachstate = info->attr__detachstate; - root_fn = info->root_fn; - arg = info->arg; - - /* Free up the arg block that pthread_create malloced. */ - VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */, - VG_USERREQ__FREE, info, 0, 0, 0); - my_assert(res == 0); - - /* Minimally observe the attributes supplied. */ - if (attr__detachstate != PTHREAD_CREATE_DETACHED - && attr__detachstate != PTHREAD_CREATE_JOINABLE) - pthread_error("thread_wrapper: invalid attr->__detachstate"); - if (attr__detachstate == PTHREAD_CREATE_DETACHED) - pthread_detach(pthread_self()); - - /* The root function might not return. But if it does we simply - move along to thread_exit_wrapper. All other ways out for the - thread (cancellation, or calling pthread_exit) lead there - too. */ - ret_val = root_fn(arg); - thread_exit_wrapper(ret_val); - /* NOTREACHED */ -} - - -/* --------------------------------------------------- - THREADs - ------------------------------------------------ */ - -__attribute__((weak)) -int pthread_yield ( void ) -{ - int res; - ensure_valgrind("pthread_yield"); - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_YIELD, 0, 0, 0, 0); - return 0; -} - - -int pthread_equal(pthread_t thread1, pthread_t thread2) -{ - return thread1 == thread2 ? 1 : 0; -} - - -/* Bundle up the args into a malloc'd block and create a new thread - consisting of thread_wrapper() applied to said malloc'd block. */ -int -pthread_create (pthread_t *__restrict __thredd, - __const pthread_attr_t *__restrict __attr, - void *(*__start_routine) (void *), - void *__restrict __arg) -{ - int tid_child; - NewThreadInfo* info; - - ensure_valgrind("pthread_create"); - - /* Allocate space for the arg block. thread_wrapper will free - it. */ - VALGRIND_MAGIC_SEQUENCE(info, NULL /* default */, - VG_USERREQ__MALLOC, - sizeof(NewThreadInfo), 0, 0, 0); - my_assert(info != NULL); - - if (__attr) - info->attr__detachstate = __attr->__detachstate; - else - info->attr__detachstate = PTHREAD_CREATE_JOINABLE; - - info->root_fn = __start_routine; - info->arg = __arg; - VALGRIND_MAGIC_SEQUENCE(tid_child, VG_INVALID_THREADID /* default */, - VG_USERREQ__APPLY_IN_NEW_THREAD, - &thread_wrapper, info, 0, 0); - my_assert(tid_child != VG_INVALID_THREADID); - - if (__thredd) - *__thredd = tid_child; - return 0; /* success */ -} - - -int -pthread_join (pthread_t __th, void **__thread_return) -{ - int res; - ensure_valgrind("pthread_join"); - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_JOIN, - __th, __thread_return, 0, 0); - return res; -} - - -void pthread_exit(void *retval) -{ - ensure_valgrind("pthread_exit"); - /* Simple! */ - thread_exit_wrapper(retval); -} - - -pthread_t pthread_self(void) -{ - int tid; - ensure_valgrind("pthread_self"); - VALGRIND_MAGIC_SEQUENCE(tid, 1 /* default */, - VG_USERREQ__PTHREAD_GET_THREADID, - 0, 0, 0, 0); - if (tid < 1 || tid >= VG_N_THREADS) - barf("pthread_self: invalid ThreadId"); - return tid; -} - - -int pthread_detach(pthread_t th) -{ - int res; - ensure_valgrind("pthread_detach"); - /* First we enquire as to the current detach state. */ - VALGRIND_MAGIC_SEQUENCE(res, (-2) /* default */, - VG_USERREQ__SET_OR_GET_DETACH, - 2 /* get */, th, 0, 0); - if (res == -1) { - /* not found */ - pthread_error("pthread_detach: " - "invalid target thread"); - return ESRCH; - } - if (res == 1) { - /* already detached */ - pthread_error("pthread_detach: " - "target thread is already detached"); - return EINVAL; - } - if (res == 0) { - VALGRIND_MAGIC_SEQUENCE(res, (-2) /* default */, - VG_USERREQ__SET_OR_GET_DETACH, - 1 /* set */, th, 0, 0); - my_assert(res == 0); - return 0; - } - barf("pthread_detach"); -} - - -/* --------------------------------------------------- - CLEANUP STACKS - ------------------------------------------------ */ - -void _pthread_cleanup_push (struct _pthread_cleanup_buffer *__buffer, - void (*__routine) (void *), - void *__arg) -{ - int res; - CleanupEntry cu; - ensure_valgrind("_pthread_cleanup_push"); - cu.fn = __routine; - cu.arg = __arg; - VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */, - VG_USERREQ__CLEANUP_PUSH, - &cu, 0, 0, 0); - my_assert(res == 0); -} - - -void _pthread_cleanup_push_defer (struct _pthread_cleanup_buffer *__buffer, - void (*__routine) (void *), - void *__arg) -{ - /* As _pthread_cleanup_push, but first save the thread's original - cancellation type in __buffer and set it to Deferred. */ - int orig_ctype; - ensure_valgrind("_pthread_cleanup_push_defer"); - /* Set to Deferred, and put the old cancellation type in res. */ - my_assert(-1 != PTHREAD_CANCEL_DEFERRED); - my_assert(-1 != PTHREAD_CANCEL_ASYNCHRONOUS); - my_assert(sizeof(struct _pthread_cleanup_buffer) >= sizeof(int)); - VALGRIND_MAGIC_SEQUENCE(orig_ctype, (-1) /* default */, - VG_USERREQ__SET_CANCELTYPE, - PTHREAD_CANCEL_DEFERRED, 0, 0, 0); - my_assert(orig_ctype != -1); - *((int*)(__buffer)) = orig_ctype; - /* Now push the cleanup. */ - _pthread_cleanup_push(NULL, __routine, __arg); -} - - -void _pthread_cleanup_pop (struct _pthread_cleanup_buffer *__buffer, - int __execute) -{ - int res; - CleanupEntry cu; - ensure_valgrind("_pthread_cleanup_push"); - cu.fn = cu.arg = NULL; /* paranoia */ - VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */, - VG_USERREQ__CLEANUP_POP, - &cu, 0, 0, 0); - if (res == 0) { - /* pop succeeded */ - if (__execute) { - cu.fn ( cu.arg ); - } - return; - } - if (res == -1) { - /* stack underflow */ - return; - } - barf("_pthread_cleanup_pop"); -} - - -void _pthread_cleanup_pop_restore (struct _pthread_cleanup_buffer *__buffer, - int __execute) -{ - int orig_ctype, fake_ctype; - /* As _pthread_cleanup_pop, but after popping/running the handler, - restore the thread's original cancellation type from the first - word of __buffer. */ - _pthread_cleanup_pop(NULL, __execute); - orig_ctype = *((int*)(__buffer)); - my_assert(orig_ctype == PTHREAD_CANCEL_DEFERRED - || orig_ctype == PTHREAD_CANCEL_ASYNCHRONOUS); - my_assert(-1 != PTHREAD_CANCEL_DEFERRED); - my_assert(-1 != PTHREAD_CANCEL_ASYNCHRONOUS); - my_assert(sizeof(struct _pthread_cleanup_buffer) >= sizeof(int)); - VALGRIND_MAGIC_SEQUENCE(fake_ctype, (-1) /* default */, - VG_USERREQ__SET_CANCELTYPE, - orig_ctype, 0, 0, 0); - my_assert(fake_ctype == PTHREAD_CANCEL_DEFERRED); -} - - -/* --------------------------------------------------- - MUTEX ATTRIBUTES - ------------------------------------------------ */ - -int __pthread_mutexattr_init(pthread_mutexattr_t *attr) -{ - attr->__mutexkind = PTHREAD_MUTEX_ERRORCHECK_NP; - return 0; -} - -int __pthread_mutexattr_settype(pthread_mutexattr_t *attr, int type) -{ - switch (type) { -# ifndef GLIBC_2_1 - case PTHREAD_MUTEX_TIMED_NP: - case PTHREAD_MUTEX_ADAPTIVE_NP: -# endif -# ifdef GLIBC_2_1 - case PTHREAD_MUTEX_FAST_NP: -# endif - case PTHREAD_MUTEX_RECURSIVE_NP: - case PTHREAD_MUTEX_ERRORCHECK_NP: - attr->__mutexkind = type; - return 0; - default: - pthread_error("pthread_mutexattr_settype: " - "invalid type"); - return EINVAL; - } -} - -int __pthread_mutexattr_destroy(pthread_mutexattr_t *attr) -{ - return 0; -} - - -/* --------------------------------------------------- - MUTEXes - ------------------------------------------------ */ - -int __pthread_mutex_init(pthread_mutex_t *mutex, - const pthread_mutexattr_t *mutexattr) -{ - mutex->__m_count = 0; - mutex->__m_owner = (_pthread_descr)VG_INVALID_THREADID; - mutex->__m_kind = PTHREAD_MUTEX_ERRORCHECK_NP; - if (mutexattr) - mutex->__m_kind = mutexattr->__mutexkind; - return 0; -} - - -int __pthread_mutex_lock(pthread_mutex_t *mutex) -{ - int res; - static int moans = N_MOANS; - if (RUNNING_ON_VALGRIND) { - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_MUTEX_LOCK, - mutex, 0, 0, 0); - return res; - } else { - if (moans-- > 0) - not_inside("pthread_mutex_lock"); - return 0; /* success */ - } -} - - -int __pthread_mutex_trylock(pthread_mutex_t *mutex) -{ - int res; - static int moans = N_MOANS; - if (RUNNING_ON_VALGRIND) { - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_MUTEX_TRYLOCK, - mutex, 0, 0, 0); - return res; - } else { - if (moans-- > 0) - not_inside("pthread_mutex_trylock"); - return 0; - } -} - - -int __pthread_mutex_unlock(pthread_mutex_t *mutex) -{ - int res; - static int moans = N_MOANS; - if (RUNNING_ON_VALGRIND) { - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_MUTEX_UNLOCK, - mutex, 0, 0, 0); - return res; - } else { - if (moans-- > 0) - not_inside("pthread_mutex_unlock"); - return 0; - } -} - - -int __pthread_mutex_destroy(pthread_mutex_t *mutex) -{ - /* Valgrind doesn't hold any resources on behalf of the mutex, so no - need to involve it. */ - if (mutex->__m_count > 0) { - pthread_error("pthread_mutex_destroy: " - "mutex is still in use"); - return EBUSY; - } - mutex->__m_count = 0; - mutex->__m_owner = (_pthread_descr)VG_INVALID_THREADID; - mutex->__m_kind = PTHREAD_MUTEX_ERRORCHECK_NP; - return 0; -} - - -/* --------------------------------------------------- - CONDITION VARIABLES - ------------------------------------------------ */ - -/* LinuxThreads supports no attributes for conditions. Hence ... */ - -int pthread_condattr_init(pthread_condattr_t *attr) -{ - return 0; -} - -int pthread_condattr_destroy(pthread_condattr_t *attr) -{ - return 0; -} - -int pthread_cond_init( pthread_cond_t *cond, - const pthread_condattr_t *cond_attr) -{ - cond->__c_waiting = (_pthread_descr)VG_INVALID_THREADID; - return 0; -} - -int pthread_cond_destroy(pthread_cond_t *cond) -{ - /* should check that no threads are waiting on this CV */ - static int moans = N_MOANS; - if (moans-- > 0) - kludged("pthread_cond_destroy"); - return 0; -} - -/* --------------------------------------------------- - SCHEDULING - ------------------------------------------------ */ - -/* This is completely bogus. */ -int pthread_getschedparam(pthread_t target_thread, - int *policy, - struct sched_param *param) -{ - static int moans = N_MOANS; - if (moans-- > 0) - kludged("pthread_getschedparam"); - if (policy) *policy = SCHED_OTHER; -# ifdef HAVE_SCHED_PRIORITY - if (param) param->sched_priority = 0; /* who knows */ -# else - if (param) param->__sched_priority = 0; /* who knows */ -# endif - return 0; -} - -int pthread_setschedparam(pthread_t target_thread, - int policy, - const struct sched_param *param) -{ - static int moans = N_MOANS; - if (moans-- > 0) - ignored("pthread_setschedparam"); - return 0; -} - -int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) -{ - int res; - ensure_valgrind("pthread_cond_wait"); - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_COND_WAIT, - cond, mutex, 0, 0); - return res; -} - -int pthread_cond_timedwait ( pthread_cond_t *cond, - pthread_mutex_t *mutex, - const struct timespec *abstime ) -{ - int res; - unsigned int ms_now, ms_end; - struct timeval timeval_now; - unsigned long long int ull_ms_now_after_1970; - unsigned long long int ull_ms_end_after_1970; - - ensure_valgrind("pthread_cond_timedwait"); - VALGRIND_MAGIC_SEQUENCE(ms_now, 0xFFFFFFFF /* default */, - VG_USERREQ__READ_MILLISECOND_TIMER, - 0, 0, 0, 0); - my_assert(ms_now != 0xFFFFFFFF); - res = gettimeofday(&timeval_now, NULL); - my_assert(res == 0); - - ull_ms_now_after_1970 - = 1000ULL * ((unsigned long long int)(timeval_now.tv_sec)) - + ((unsigned long long int)(timeval_now.tv_usec / 1000000)); - ull_ms_end_after_1970 - = 1000ULL * ((unsigned long long int)(abstime->tv_sec)) - + ((unsigned long long int)(abstime->tv_nsec / 1000000)); - if (ull_ms_end_after_1970 < ull_ms_now_after_1970) - ull_ms_end_after_1970 = ull_ms_now_after_1970; - ms_end - = ms_now + (unsigned int)(ull_ms_end_after_1970 - ull_ms_now_after_1970); - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_COND_TIMEDWAIT, - cond, mutex, ms_end, 0); - return res; -} - - -int pthread_cond_signal(pthread_cond_t *cond) -{ - int res; - ensure_valgrind("pthread_cond_signal"); - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_COND_SIGNAL, - cond, 0, 0, 0); - return res; -} - -int pthread_cond_broadcast(pthread_cond_t *cond) -{ - int res; - ensure_valgrind("pthread_cond_broadcast"); - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_COND_BROADCAST, - cond, 0, 0, 0); - return res; -} - - -/* --------------------------------------------------- - CANCELLATION - ------------------------------------------------ */ - -int pthread_setcancelstate(int state, int *oldstate) -{ - int res; - ensure_valgrind("pthread_setcancelstate"); - if (state != PTHREAD_CANCEL_ENABLE - && state != PTHREAD_CANCEL_DISABLE) { - pthread_error("pthread_setcancelstate: " - "invalid state"); - return EINVAL; - } - my_assert(-1 != PTHREAD_CANCEL_ENABLE); - my_assert(-1 != PTHREAD_CANCEL_DISABLE); - VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */, - VG_USERREQ__SET_CANCELSTATE, - state, 0, 0, 0); - my_assert(res != -1); - if (oldstate) - *oldstate = res; - return 0; -} - -int pthread_setcanceltype(int type, int *oldtype) -{ - int res; - ensure_valgrind("pthread_setcanceltype"); - if (type != PTHREAD_CANCEL_DEFERRED - && type != PTHREAD_CANCEL_ASYNCHRONOUS) { - pthread_error("pthread_setcanceltype: " - "invalid type"); - return EINVAL; - } - my_assert(-1 != PTHREAD_CANCEL_DEFERRED); - my_assert(-1 != PTHREAD_CANCEL_ASYNCHRONOUS); - VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */, - VG_USERREQ__SET_CANCELTYPE, - type, 0, 0, 0); - my_assert(res != -1); - if (oldtype) - *oldtype = res; - return 0; -} - -int pthread_cancel(pthread_t thread) -{ - int res; - ensure_valgrind("pthread_cancel"); - VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */, - VG_USERREQ__SET_CANCELPEND, - thread, &thread_exit_wrapper, 0, 0); - my_assert(res != -1); - return res; -} - -static __inline__ -void __my_pthread_testcancel(void) -{ - int res; - VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */, - VG_USERREQ__TESTCANCEL, - 0, 0, 0, 0); - my_assert(res == 0); -} - -void pthread_testcancel ( void ) -{ - __my_pthread_testcancel(); -} - - -/* Not really sure what this is for. I suspect for doing the POSIX - requirements for fork() and exec(). We do this internally anyway - whenever those syscalls are observed, so this could be superfluous, - but hey ... -*/ -void __pthread_kill_other_threads_np ( void ) -{ - int res; - ensure_valgrind("__pthread_kill_other_threads_np"); - VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */, - VG_USERREQ__NUKE_OTHER_THREADS, - 0, 0, 0, 0); - my_assert(res == 0); -} - - -/* --------------------------------------------------- - SIGNALS - ------------------------------------------------ */ - -#include - -int pthread_sigmask(int how, const sigset_t *newmask, - sigset_t *oldmask) -{ - int res; - - /* A bit subtle, because the scheduler expects newmask and oldmask - to be vki_sigset_t* rather than sigset_t*, and the two are - different. Fortunately the first 64 bits of a sigset_t are - exactly a vki_sigset_t, so we just pass the pointers through - unmodified. Haaaack! - - Also mash the how value so that the SIG_ constants from glibc - constants to VKI_ constants, so that the former do not have to - be included into vg_scheduler.c. */ - - ensure_valgrind("pthread_sigmask"); - - switch (how) { - case SIG_SETMASK: how = VKI_SIG_SETMASK; break; - case SIG_BLOCK: how = VKI_SIG_BLOCK; break; - case SIG_UNBLOCK: how = VKI_SIG_UNBLOCK; break; - default: pthread_error("pthread_sigmask: invalid how"); - return EINVAL; - } - - /* Crude check */ - if (newmask == NULL) - return EFAULT; - - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_SIGMASK, - how, newmask, oldmask, 0); - - /* The scheduler tells us of any memory violations. */ - return res == 0 ? 0 : EFAULT; -} - - -int sigwait ( const sigset_t* set, int* sig ) -{ - int res; - ensure_valgrind("sigwait"); - /* As with pthread_sigmask we deliberately confuse sigset_t with - vki_ksigset_t. */ - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__SIGWAIT, - set, sig, 0, 0); - return res; -} - - -int pthread_kill(pthread_t thread, int signo) -{ - int res; - ensure_valgrind("pthread_kill"); - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_KILL, - thread, signo, 0, 0); - return res; -} - - -/* Copied verbatim from Linuxthreads */ -/* Redefine raise() to send signal to calling thread only, - as per POSIX 1003.1c */ -int raise (int sig) -{ - int retcode = pthread_kill(pthread_self(), sig); - if (retcode == 0) { - return 0; - } else { - errno = retcode; - return -1; - } -} - - -int pause ( void ) -{ - unsigned int n_orig, n_now; - struct vki_timespec nanosleep_interval; - ensure_valgrind("pause"); - - /* This is surely a cancellation point. */ - __my_pthread_testcancel(); - - VALGRIND_MAGIC_SEQUENCE(n_orig, 0xFFFFFFFF /* default */, - VG_USERREQ__GET_N_SIGS_RETURNED, - 0, 0, 0, 0); - my_assert(n_orig != 0xFFFFFFFF); - - while (1) { - VALGRIND_MAGIC_SEQUENCE(n_now, 0xFFFFFFFF /* default */, - VG_USERREQ__GET_N_SIGS_RETURNED, - 0, 0, 0, 0); - my_assert(n_now != 0xFFFFFFFF); - my_assert(n_now >= n_orig); - if (n_now != n_orig) break; - - nanosleep_interval.tv_sec = 0; - nanosleep_interval.tv_nsec = 52 * 1000 * 1000; /* 52 milliseconds */ - /* It's critical here that valgrind's nanosleep implementation - is nonblocking. */ - (void)my_do_syscall2(__NR_nanosleep, - (int)(&nanosleep_interval), (int)NULL); - } - - * (__errno_location()) = EINTR; - return -1; -} - - -/* --------------------------------------------------- - THREAD-SPECIFICs - ------------------------------------------------ */ - -int __pthread_key_create(pthread_key_t *key, - void (*destr_function) (void *)) -{ - int res; - ensure_valgrind("pthread_key_create"); - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_KEY_CREATE, - key, destr_function, 0, 0); - return res; -} - -int pthread_key_delete(pthread_key_t key) -{ - static int moans = N_MOANS; - if (moans-- > 0) - ignored("pthread_key_delete"); - return 0; -} - -int __pthread_setspecific(pthread_key_t key, const void *pointer) -{ - int res; - ensure_valgrind("pthread_setspecific"); - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_SETSPECIFIC, - key, pointer, 0, 0); - return res; -} - -void * __pthread_getspecific(pthread_key_t key) -{ - int res; - ensure_valgrind("pthread_getspecific"); - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_GETSPECIFIC, - key, 0 , 0, 0); - return (void*)res; -} - - -/* --------------------------------------------------- - ONCEry - ------------------------------------------------ */ - -static pthread_mutex_t once_masterlock = PTHREAD_MUTEX_INITIALIZER; - - -int __pthread_once ( pthread_once_t *once_control, - void (*init_routine) (void) ) -{ - int res; - ensure_valgrind("pthread_once"); - - res = __pthread_mutex_lock(&once_masterlock); - - if (res != 0) { - barf("pthread_once: Looks like your program's " - "init routine calls back to pthread_once() ?!"); - } - - if (*once_control == 0) { - *once_control = 1; - init_routine(); - } - - __pthread_mutex_unlock(&once_masterlock); - - return 0; -} - - -/* --------------------------------------------------- - MISC - ------------------------------------------------ */ - -static pthread_mutex_t pthread_atfork_lock - = PTHREAD_MUTEX_INITIALIZER; - -int __pthread_atfork ( void (*prepare)(void), - void (*parent)(void), - void (*child)(void) ) -{ - int n, res; - ForkHandlerEntry entry; - - ensure_valgrind("pthread_atfork"); - __pthread_mutex_lock(&pthread_atfork_lock); - - /* Fetch old counter */ - VALGRIND_MAGIC_SEQUENCE(n, -2 /* default */, - VG_USERREQ__GET_FHSTACK_USED, - 0, 0, 0, 0); - my_assert(n >= 0 && n < VG_N_FORKHANDLERSTACK); - if (n == VG_N_FORKHANDLERSTACK-1) - barf("pthread_atfork: VG_N_FORKHANDLERSTACK is too low; " - "increase and recompile"); - - /* Add entry */ - entry.prepare = *prepare; - entry.parent = *parent; - entry.child = *child; - VALGRIND_MAGIC_SEQUENCE(res, -2 /* default */, - VG_USERREQ__SET_FHSTACK_ENTRY, - n, &entry, 0, 0); - my_assert(res == 0); - - /* Bump counter */ - VALGRIND_MAGIC_SEQUENCE(res, -2 /* default */, - VG_USERREQ__SET_FHSTACK_USED, - n+1, 0, 0, 0); - my_assert(res == 0); - - __pthread_mutex_unlock(&pthread_atfork_lock); - return 0; -} - - -__attribute__((weak)) -void __pthread_initialize ( void ) -{ - ensure_valgrind("__pthread_initialize"); -} - - -/* --------------------------------------------------- - LIBRARY-PRIVATE THREAD SPECIFIC STATE - ------------------------------------------------ */ - -#include -static int thread_specific_errno[VG_N_THREADS]; -static int thread_specific_h_errno[VG_N_THREADS]; -static struct __res_state - thread_specific_res_state[VG_N_THREADS]; - -int* __errno_location ( void ) -{ - int tid; - /* ensure_valgrind("__errno_location"); */ - VALGRIND_MAGIC_SEQUENCE(tid, 1 /* default */, - VG_USERREQ__PTHREAD_GET_THREADID, - 0, 0, 0, 0); - /* 'cos I'm paranoid ... */ - if (tid < 1 || tid >= VG_N_THREADS) - barf("__errno_location: invalid ThreadId"); - return & thread_specific_errno[tid]; -} - -int* __h_errno_location ( void ) -{ - int tid; - /* ensure_valgrind("__h_errno_location"); */ - VALGRIND_MAGIC_SEQUENCE(tid, 1 /* default */, - VG_USERREQ__PTHREAD_GET_THREADID, - 0, 0, 0, 0); - /* 'cos I'm paranoid ... */ - if (tid < 1 || tid >= VG_N_THREADS) - barf("__h_errno_location: invalid ThreadId"); - return & thread_specific_h_errno[tid]; -} - -struct __res_state* __res_state ( void ) -{ - int tid; - /* ensure_valgrind("__res_state"); */ - VALGRIND_MAGIC_SEQUENCE(tid, 1 /* default */, - VG_USERREQ__PTHREAD_GET_THREADID, - 0, 0, 0, 0); - /* 'cos I'm paranoid ... */ - if (tid < 1 || tid >= VG_N_THREADS) - barf("__res_state: invalid ThreadId"); - return & thread_specific_res_state[tid]; -} - - -/* --------------------------------------------------- - LIBC-PRIVATE SPECIFIC DATA - ------------------------------------------------ */ - -/* Relies on assumption that initial private data is NULL. This - should be fixed somehow. */ - -/* The allowable keys (indices) (all 2 of them). - From sysdeps/pthread/bits/libc-tsd.h -*/ -#define N_LIBC_TSD_EXTRA_KEYS 1 - -enum __libc_tsd_key_t { _LIBC_TSD_KEY_MALLOC = 0, - _LIBC_TSD_KEY_DL_ERROR, - _LIBC_TSD_KEY_N }; - -/* Auto-initialising subsystem. libc_specifics_inited is set - after initialisation. libc_specifics_inited_mx guards it. */ -static int libc_specifics_inited = 0; -static pthread_mutex_t libc_specifics_inited_mx = PTHREAD_MUTEX_INITIALIZER; - -/* These are the keys we must initialise the first time. */ -static pthread_key_t libc_specifics_keys[_LIBC_TSD_KEY_N - + N_LIBC_TSD_EXTRA_KEYS]; - -/* Initialise the keys, if they are not already initialise. */ -static -void init_libc_tsd_keys ( void ) -{ - int res, i; - pthread_key_t k; - - res = pthread_mutex_lock(&libc_specifics_inited_mx); - if (res != 0) barf("init_libc_tsd_keys: lock"); - - if (libc_specifics_inited == 0) { - /* printf("INIT libc specifics\n"); */ - libc_specifics_inited = 1; - for (i = 0; i < _LIBC_TSD_KEY_N + N_LIBC_TSD_EXTRA_KEYS; i++) { - res = pthread_key_create(&k, NULL); - if (res != 0) barf("init_libc_tsd_keys: create"); - libc_specifics_keys[i] = k; - } - } - - res = pthread_mutex_unlock(&libc_specifics_inited_mx); - if (res != 0) barf("init_libc_tsd_keys: unlock"); -} - - -static int -libc_internal_tsd_set ( enum __libc_tsd_key_t key, - const void * pointer ) -{ - int res; - static int moans = N_MOANS; - /* printf("SET SET SET key %d ptr %p\n", key, pointer); */ - if (key < _LIBC_TSD_KEY_MALLOC - || key >= _LIBC_TSD_KEY_N + N_LIBC_TSD_EXTRA_KEYS) - barf("libc_internal_tsd_set: invalid key"); - if (key >= _LIBC_TSD_KEY_N && moans-- > 0) - fprintf(stderr, - "valgrind's libpthread.so: libc_internal_tsd_set: " - "dubious key %d\n", key); - init_libc_tsd_keys(); - res = pthread_setspecific(libc_specifics_keys[key], pointer); - if (res != 0) barf("libc_internal_tsd_set: setspecific failed"); - return 0; -} - -static void * -libc_internal_tsd_get ( enum __libc_tsd_key_t key ) -{ - void* v; - static int moans = N_MOANS; - /* printf("GET GET GET key %d\n", key); */ - if (key < _LIBC_TSD_KEY_MALLOC - || key >= _LIBC_TSD_KEY_N + N_LIBC_TSD_EXTRA_KEYS) - barf("libc_internal_tsd_get: invalid key"); - if (key >= _LIBC_TSD_KEY_N && moans-- > 0) - fprintf(stderr, - "valgrind's libpthread.so: libc_internal_tsd_get: " - "dubious key %d\n", key); - init_libc_tsd_keys(); - v = pthread_getspecific(libc_specifics_keys[key]); - /* if (v == NULL) barf("libc_internal_tsd_set: getspecific failed"); */ - return v; -} - - - - -int (*__libc_internal_tsd_set) - (enum __libc_tsd_key_t key, const void * pointer) - = libc_internal_tsd_set; - -void* (*__libc_internal_tsd_get) - (enum __libc_tsd_key_t key) - = libc_internal_tsd_get; - - -/* --------------------------------------------------------------------- - These are here (I think) because they are deemed cancellation - points by POSIX. For the moment we'll simply pass the call along - to the corresponding thread-unaware (?) libc routine. - ------------------------------------------------------------------ */ - -#include -#include -#include - -#ifdef GLIBC_2_1 -extern -int __sigaction - (int signum, - const struct sigaction *act, - struct sigaction *oldact); -#else -extern -int __libc_sigaction - (int signum, - const struct sigaction *act, - struct sigaction *oldact); -#endif -int sigaction(int signum, - const struct sigaction *act, - struct sigaction *oldact) -{ - __my_pthread_testcancel(); -# ifdef GLIBC_2_1 - return __sigaction(signum, act, oldact); -# else - return __libc_sigaction(signum, act, oldact); -# endif -} - - -extern -int __libc_connect(int sockfd, - const struct sockaddr *serv_addr, - socklen_t addrlen); -__attribute__((weak)) -int connect(int sockfd, - const struct sockaddr *serv_addr, - socklen_t addrlen) -{ - __my_pthread_testcancel(); - return __libc_connect(sockfd, serv_addr, addrlen); -} - - -extern -int __libc_fcntl(int fd, int cmd, long arg); -__attribute__((weak)) -int fcntl(int fd, int cmd, long arg) -{ - __my_pthread_testcancel(); - return __libc_fcntl(fd, cmd, arg); -} - - -extern -ssize_t __libc_write(int fd, const void *buf, size_t count); -__attribute__((weak)) -ssize_t write(int fd, const void *buf, size_t count) -{ - __my_pthread_testcancel(); - return __libc_write(fd, buf, count); -} - - -extern -ssize_t __libc_read(int fd, void *buf, size_t count); -__attribute__((weak)) -ssize_t read(int fd, void *buf, size_t count) -{ - __my_pthread_testcancel(); - return __libc_read(fd, buf, count); -} - - -extern -int __libc_open64(const char *pathname, int flags, mode_t mode); -__attribute__((weak)) -int open64(const char *pathname, int flags, mode_t mode) -{ - __my_pthread_testcancel(); - return __libc_open64(pathname, flags, mode); -} - - -extern -int __libc_open(const char *pathname, int flags, mode_t mode); -__attribute__((weak)) -int open(const char *pathname, int flags, mode_t mode) -{ - __my_pthread_testcancel(); - return __libc_open(pathname, flags, mode); -} - - -extern -int __libc_close(int fd); -__attribute__((weak)) -int close(int fd) -{ - __my_pthread_testcancel(); - return __libc_close(fd); -} - - -extern -int __libc_accept(int s, struct sockaddr *addr, socklen_t *addrlen); -__attribute__((weak)) -int accept(int s, struct sockaddr *addr, socklen_t *addrlen) -{ - __my_pthread_testcancel(); - wait_for_fd_to_be_readable_or_erring(s); - __my_pthread_testcancel(); - return __libc_accept(s, addr, addrlen); -} - - -extern -pid_t __libc_waitpid(pid_t pid, int *status, int options); -__attribute__((weak)) -pid_t waitpid(pid_t pid, int *status, int options) -{ - __my_pthread_testcancel(); - return __libc_waitpid(pid, status, options); -} - - -extern -int __libc_nanosleep(const struct timespec *req, struct timespec *rem); -__attribute__((weak)) -int nanosleep(const struct timespec *req, struct timespec *rem) -{ - __my_pthread_testcancel(); - return __libc_nanosleep(req, rem); -} - - -extern -int __libc_fsync(int fd); -__attribute__((weak)) -int fsync(int fd) -{ - __my_pthread_testcancel(); - return __libc_fsync(fd); -} - - -extern -off_t __libc_lseek(int fildes, off_t offset, int whence); -__attribute__((weak)) -off_t lseek(int fildes, off_t offset, int whence) -{ - __my_pthread_testcancel(); - return __libc_lseek(fildes, offset, whence); -} - - -extern -__off64_t __libc_lseek64(int fildes, __off64_t offset, int whence); -__attribute__((weak)) -__off64_t lseek64(int fildes, __off64_t offset, int whence) -{ - __my_pthread_testcancel(); - return __libc_lseek64(fildes, offset, whence); -} - - -extern -ssize_t __libc_pread64 (int __fd, void *__buf, size_t __nbytes, - __off64_t __offset); -ssize_t __pread64 (int __fd, void *__buf, size_t __nbytes, - __off64_t __offset) -{ - __my_pthread_testcancel(); - return __libc_pread64(__fd, __buf, __nbytes, __offset); -} - - -extern -ssize_t __libc_pwrite64 (int __fd, const void *__buf, size_t __nbytes, - __off64_t __offset); -ssize_t __pwrite64 (int __fd, const void *__buf, size_t __nbytes, - __off64_t __offset) -{ - __my_pthread_testcancel(); - return __libc_pwrite64(__fd, __buf, __nbytes, __offset); -} - - -extern -ssize_t __libc_pwrite(int fd, const void *buf, size_t count, off_t offset); -__attribute__((weak)) -ssize_t pwrite(int fd, const void *buf, size_t count, off_t offset) -{ - __my_pthread_testcancel(); - return __libc_pwrite(fd, buf, count, offset); -} - - -extern -ssize_t __libc_pread(int fd, void *buf, size_t count, off_t offset); -__attribute__((weak)) -ssize_t pread(int fd, void *buf, size_t count, off_t offset) -{ - __my_pthread_testcancel(); - return __libc_pread(fd, buf, count, offset); -} - - -extern -void __libc_longjmp(jmp_buf env, int val) __attribute((noreturn)); -/* not weak: __attribute__((weak)) */ -void longjmp(jmp_buf env, int val) -{ - __libc_longjmp(env, val); -} - - -extern void __libc_siglongjmp (sigjmp_buf env, int val) - __attribute__ ((noreturn)); -void siglongjmp(sigjmp_buf env, int val) -{ - kludged("siglongjmp (cleanup handlers are ignored)"); - __libc_siglongjmp(env, val); -} - - -extern -int __libc_send(int s, const void *msg, size_t len, int flags); -__attribute__((weak)) -int send(int s, const void *msg, size_t len, int flags) -{ - __my_pthread_testcancel(); - return __libc_send(s, msg, len, flags); -} - - -extern -int __libc_recv(int s, void *buf, size_t len, int flags); -__attribute__((weak)) -int recv(int s, void *buf, size_t len, int flags) -{ - __my_pthread_testcancel(); - wait_for_fd_to_be_readable_or_erring(s); - __my_pthread_testcancel(); - return __libc_recv(s, buf, len, flags); -} - - -extern -int __libc_sendmsg(int s, const struct msghdr *msg, int flags); -__attribute__((weak)) -int sendmsg(int s, const struct msghdr *msg, int flags) -{ - __my_pthread_testcancel(); - return __libc_sendmsg(s, msg, flags); -} - - -extern -int __libc_recvmsg(int s, struct msghdr *msg, int flags); -__attribute__((weak)) -int recvmsg(int s, struct msghdr *msg, int flags) -{ - __my_pthread_testcancel(); - return __libc_recvmsg(s, msg, flags); -} - - -extern -int __libc_recvfrom(int s, void *buf, size_t len, int flags, - struct sockaddr *from, socklen_t *fromlen); -__attribute__((weak)) -int recvfrom(int s, void *buf, size_t len, int flags, - struct sockaddr *from, socklen_t *fromlen) -{ - __my_pthread_testcancel(); - wait_for_fd_to_be_readable_or_erring(s); - __my_pthread_testcancel(); - return __libc_recvfrom(s, buf, len, flags, from, fromlen); -} - - -extern -int __libc_sendto(int s, const void *msg, size_t len, int flags, - const struct sockaddr *to, socklen_t tolen); -__attribute__((weak)) -int sendto(int s, const void *msg, size_t len, int flags, - const struct sockaddr *to, socklen_t tolen) -{ - __my_pthread_testcancel(); - return __libc_sendto(s, msg, len, flags, to, tolen); -} - - -extern -int __libc_system(const char* str); -__attribute__((weak)) -int system(const char* str) -{ - __my_pthread_testcancel(); - return __libc_system(str); -} - - -extern -pid_t __libc_wait(int *status); -__attribute__((weak)) -pid_t wait(int *status) -{ - __my_pthread_testcancel(); - return __libc_wait(status); -} - - -extern -int __libc_msync(const void *start, size_t length, int flags); -__attribute__((weak)) -int msync(const void *start, size_t length, int flags) -{ - __my_pthread_testcancel(); - return __libc_msync(start, length, flags); -} - - -/*--- fork and its helper ---*/ - -static -void run_fork_handlers ( int what ) -{ - ForkHandlerEntry entry; - int n_h, n_handlers, i, res; - - my_assert(what == 0 || what == 1 || what == 2); - - /* Fetch old counter */ - VALGRIND_MAGIC_SEQUENCE(n_handlers, -2 /* default */, - VG_USERREQ__GET_FHSTACK_USED, - 0, 0, 0, 0); - my_assert(n_handlers >= 0 && n_handlers < VG_N_FORKHANDLERSTACK); - - /* Prepare handlers (what == 0) are called in opposite order of - calls to pthread_atfork. Parent and child handlers are called - in the same order as calls to pthread_atfork. */ - if (what == 0) - n_h = n_handlers - 1; - else - n_h = 0; - - for (i = 0; i < n_handlers; i++) { - VALGRIND_MAGIC_SEQUENCE(res, -2 /* default */, - VG_USERREQ__GET_FHSTACK_ENTRY, - n_h, &entry, 0, 0); - my_assert(res == 0); - switch (what) { - case 0: if (entry.prepare) entry.prepare(); - n_h--; break; - case 1: if (entry.parent) entry.parent(); - n_h++; break; - case 2: if (entry.child) entry.child(); - n_h++; break; - default: barf("run_fork_handlers: invalid what"); - } - } - - if (what != 0 /* prepare */) { - /* Empty out the stack. */ - VALGRIND_MAGIC_SEQUENCE(res, -2 /* default */, - VG_USERREQ__SET_FHSTACK_USED, - 0, 0, 0, 0); - my_assert(res == 0); - } -} - -extern -pid_t __libc_fork(void); -pid_t __fork(void) -{ - pid_t pid; - __my_pthread_testcancel(); - __pthread_mutex_lock(&pthread_atfork_lock); - - run_fork_handlers(0 /* prepare */); - pid = __libc_fork(); - if (pid == 0) { - /* I am the child */ - run_fork_handlers(2 /* child */); - __pthread_mutex_init(&pthread_atfork_lock, NULL); - } else { - /* I am the parent */ - run_fork_handlers(1 /* parent */); - __pthread_mutex_unlock(&pthread_atfork_lock); - } - return pid; -} - - - - -/* --------------------------------------------------------------------- - Nonblocking implementations of select() and poll(). This stuff will - surely rot your mind. - ------------------------------------------------------------------ */ - -/*--------------------------------------------------*/ - -#include "vg_kerneliface.h" - -static -__inline__ -int is_kerror ( int res ) -{ - if (res >= -4095 && res <= -1) - return 1; - else - return 0; -} - - -static -int my_do_syscall1 ( int syscallno, int arg1 ) -{ - int __res; - __asm__ volatile ("pushl %%ebx; movl %%edx,%%ebx ; int $0x80 ; popl %%ebx" - : "=a" (__res) - : "0" (syscallno), - "d" (arg1) ); - return __res; -} - -static -int my_do_syscall2 ( int syscallno, - int arg1, int arg2 ) -{ - int __res; - __asm__ volatile ("pushl %%ebx; movl %%edx,%%ebx ; int $0x80 ; popl %%ebx" - : "=a" (__res) - : "0" (syscallno), - "d" (arg1), - "c" (arg2) ); - return __res; -} - -static -int my_do_syscall3 ( int syscallno, - int arg1, int arg2, int arg3 ) -{ - int __res; - __asm__ volatile ("pushl %%ebx; movl %%esi,%%ebx ; int $0x80 ; popl %%ebx" - : "=a" (__res) - : "0" (syscallno), - "S" (arg1), - "c" (arg2), - "d" (arg3) ); - return __res; -} - -static -int do_syscall_select( int n, - vki_fd_set* readfds, - vki_fd_set* writefds, - vki_fd_set* exceptfds, - struct vki_timeval * timeout ) -{ - int res; - int args[5]; - args[0] = n; - args[1] = (int)readfds; - args[2] = (int)writefds; - args[3] = (int)exceptfds; - args[4] = (int)timeout; - res = my_do_syscall1(__NR_select, (int)(&(args[0])) ); - return res; -} - - -/* This is a wrapper round select(), which makes it thread-safe, - meaning that only this thread will block, rather than the entire - process. This wrapper in turn depends on nanosleep() not to block - the entire process, but I think (hope? suspect?) that POSIX - pthreads guarantees that to be the case. - - Basic idea is: modify the timeout parameter to select so that it - returns immediately. Poll like this until select returns non-zero, - indicating something interesting happened, or until our time is up. - Space out the polls with nanosleeps of say 20 milliseconds, which - is required to be nonblocking; this allows other threads to run. - - Assumes: - * (checked via my_assert) types fd_set and vki_fd_set are identical. - * (checked via my_assert) types timeval and vki_timeval are identical. - * (unchecked) libc error numbers (EINTR etc) are the negation of the - kernel's error numbers (VKI_EINTR etc). -*/ - -/* __attribute__((weak)) */ -int select ( int n, - fd_set *rfds, - fd_set *wfds, - fd_set *xfds, - struct timeval *timeout ) -{ - unsigned int ms_now, ms_end; - int res; - fd_set rfds_copy; - fd_set wfds_copy; - fd_set xfds_copy; - struct vki_timeval t_now; - struct vki_timeval zero_timeout; - struct vki_timespec nanosleep_interval; - - __my_pthread_testcancel(); - - /* gcc's complains about ms_end being used uninitialised -- classic - case it can't understand, where ms_end is both defined and used - only if timeout != NULL. Hence ... */ - ms_end = 0; - - /* We assume that the kernel and libc data layouts are identical - for the following types. These asserts provide a crude - check. */ - if (sizeof(fd_set) != sizeof(vki_fd_set) - || sizeof(struct timeval) != sizeof(struct vki_timeval)) - barf("valgrind's hacky non-blocking select(): data sizes error"); - - /* Detect the current time and simultaneously find out if we are - running on Valgrind. */ - VALGRIND_MAGIC_SEQUENCE(ms_now, 0xFFFFFFFF /* default */, - VG_USERREQ__READ_MILLISECOND_TIMER, - 0, 0, 0, 0); - - /* If a zero timeout specified, this call is harmless. Also go - this route if we're not running on Valgrind, for whatever - reason. */ - if ( (timeout && timeout->tv_sec == 0 && timeout->tv_usec == 0) - || (ms_now == 0xFFFFFFFF) ) { - res = do_syscall_select( n, (vki_fd_set*)rfds, - (vki_fd_set*)wfds, - (vki_fd_set*)xfds, - (struct vki_timeval*)timeout); - if (is_kerror(res)) { - * (__errno_location()) = -res; - return -1; - } else { - return res; - } - } - - /* If a timeout was specified, set ms_end to be the end millisecond - counter [wallclock] time. */ - if (timeout) { - res = my_do_syscall2(__NR_gettimeofday, (int)&t_now, (int)NULL); - my_assert(res == 0); - ms_end = ms_now; - ms_end += (timeout->tv_usec / 1000); - ms_end += (timeout->tv_sec * 1000); - /* Stay sane ... */ - my_assert (ms_end >= ms_now); - } - - /* fprintf(stderr, "MY_SELECT: before loop\n"); */ - - /* Either timeout == NULL, meaning wait indefinitely, or timeout != - NULL, in which case ms_end holds the end time. */ - - while (1) { - - /* First, do a return-immediately select(). */ - - /* These could be trashed each time round the loop, so restore - them each time. */ - if (rfds) rfds_copy = *rfds; - if (wfds) wfds_copy = *wfds; - if (xfds) xfds_copy = *xfds; - - zero_timeout.tv_sec = zero_timeout.tv_usec = 0; - - res = do_syscall_select( n, - rfds ? (vki_fd_set*)(&rfds_copy) : NULL, - wfds ? (vki_fd_set*)(&wfds_copy) : NULL, - xfds ? (vki_fd_set*)(&xfds_copy) : NULL, - & zero_timeout ); - if (is_kerror(res)) { - /* Some kind of error (including EINTR). Set errno and - return. The sets are unspecified in this case. */ - * (__errno_location()) = -res; - return -1; - } - if (res > 0) { - /* one or more fds is ready. Copy out resulting sets and - return. */ - if (rfds) *rfds = rfds_copy; - if (wfds) *wfds = wfds_copy; - if (xfds) *xfds = xfds_copy; - return res; - } - - /* Nothing interesting happened, so we go to sleep for a - while. */ - - /* fprintf(stderr, "MY_SELECT: nanosleep\n"); */ - /* nanosleep and go round again */ - nanosleep_interval.tv_sec = 0; - nanosleep_interval.tv_nsec = 50 * 1000 * 1000; /* 50 milliseconds */ - /* It's critical here that valgrind's nanosleep implementation - is nonblocking. */ - res = my_do_syscall2(__NR_nanosleep, - (int)(&nanosleep_interval), (int)NULL); - if (res == -VKI_EINTR) { - /* The nanosleep was interrupted by a signal. So we do the - same. */ - * (__errno_location()) = EINTR; - return -1; - } - - /* Sleeping finished. If a finite timeout, check to see if it - has expired yet. */ - if (timeout) { - VALGRIND_MAGIC_SEQUENCE(ms_now, 0xFFFFFFFF /* default */, - VG_USERREQ__READ_MILLISECOND_TIMER, - 0, 0, 0, 0); - my_assert(ms_now != 0xFFFFFFFF); - if (ms_now >= ms_end) { - /* timeout; nothing interesting happened. */ - if (rfds) FD_ZERO(rfds); - if (wfds) FD_ZERO(wfds); - if (xfds) FD_ZERO(xfds); - return 0; - } - } - - } -} - - - - -#include - -#ifndef HAVE_NFDS_T -typedef unsigned long int nfds_t; -#endif - - -/* __attribute__((weak)) */ -int poll (struct pollfd *__fds, nfds_t __nfds, int __timeout) -{ - unsigned int ms_now, ms_end; - int res, i; - struct vki_timespec nanosleep_interval; - - __my_pthread_testcancel(); - ensure_valgrind("poll"); - - /* Detect the current time and simultaneously find out if we are - running on Valgrind. */ - VALGRIND_MAGIC_SEQUENCE(ms_now, 0xFFFFFFFF /* default */, - VG_USERREQ__READ_MILLISECOND_TIMER, - 0, 0, 0, 0); - - if (/* CHECK SIZES FOR struct pollfd */ - sizeof(struct timeval) != sizeof(struct vki_timeval)) - barf("valgrind's hacky non-blocking poll(): data sizes error"); - - /* dummy initialisation to keep gcc -Wall happy */ - ms_end = 0; - - /* If a zero timeout specified, this call is harmless. Also do - this if not running on Valgrind. */ - if (__timeout == 0 || ms_now == 0xFFFFFFFF) { - res = my_do_syscall3(__NR_poll, (int)__fds, __nfds, __timeout); - if (is_kerror(res)) { - * (__errno_location()) = -res; - return -1; - } else { - return res; - } - } - - /* If a timeout was specified, set ms_end to be the end wallclock - time. Easy considering that __timeout is in milliseconds. */ - if (__timeout > 0) { - ms_end = ms_now + (unsigned int)__timeout; - } - - /* fprintf(stderr, "MY_POLL: before loop\n"); */ - - /* Either timeout < 0, meaning wait indefinitely, or timeout > 0, - in which case t_end holds the end time. */ - - my_assert(__timeout != 0); - - while (1) { - - /* Do a return-immediately poll. */ - - res = my_do_syscall3(__NR_poll, (int)__fds, __nfds, 0 ); - if (is_kerror(res)) { - /* Some kind of error. Set errno and return. */ - * (__errno_location()) = -res; - return -1; - } - if (res > 0) { - /* One or more fds is ready. Return now. */ - return res; - } - - /* Nothing interesting happened, so we go to sleep for a - while. */ - - /* fprintf(stderr, "MY_POLL: nanosleep\n"); */ - /* nanosleep and go round again */ - nanosleep_interval.tv_sec = 0; - nanosleep_interval.tv_nsec = 51 * 1000 * 1000; /* 51 milliseconds */ - /* It's critical here that valgrind's nanosleep implementation - is nonblocking. */ - (void)my_do_syscall2(__NR_nanosleep, - (int)(&nanosleep_interval), (int)NULL); - - /* Sleeping finished. If a finite timeout, check to see if it - has expired yet. */ - if (__timeout > 0) { - VALGRIND_MAGIC_SEQUENCE(ms_now, 0xFFFFFFFF /* default */, - VG_USERREQ__READ_MILLISECOND_TIMER, - 0, 0, 0, 0); - my_assert(ms_now != 0xFFFFFFFF); - if (ms_now >= ms_end) { - /* timeout; nothing interesting happened. */ - for (i = 0; i < __nfds; i++) - __fds[i].revents = 0; - return 0; - } - } - - } -} - - -/* Helper function used to make accept() non-blocking. Idea is to use - the above nonblocking poll() to make this thread ONLY wait for the - specified fd to become ready, and then return. */ - -/* Sigh -- a hack. We're not supposed to include this file directly; - should do it via /usr/include/fcntl.h, but that introduces a - varargs prototype for fcntl itself, which we can't mimic. */ -#define _FCNTL_H -#include - -static void wait_for_fd_to_be_readable_or_erring ( int fd ) -{ - struct pollfd pfd; - int res; - - /* fprintf(stderr, "wait_for_fd_to_be_readable_or_erring %d\n", fd); */ - - /* First check to see if the fd is nonblocking, and/or invalid. In - either case return immediately. */ - res = __libc_fcntl(fd, F_GETFL, 0); - if (res == -1) return; /* fd is invalid somehow */ - if (res & O_NONBLOCK) return; /* fd is nonblocking */ - - /* Ok, we'd better wait with poll. */ - pfd.fd = fd; - pfd.events = POLLIN | POLLPRI | POLLERR | POLLHUP | POLLNVAL; - /* ... but not POLLOUT, you may notice. */ - pfd.revents = 0; - (void)poll(&pfd, 1, -1 /* forever */); -} - - -/* --------------------------------------------------------------------- - Hacky implementation of semaphores. - ------------------------------------------------------------------ */ - -#include - -/* This is a terrible way to do the remapping. Plan is to import an - AVL tree at some point. */ - -typedef - struct { - pthread_mutex_t se_mx; - pthread_cond_t se_cv; - int count; - } - vg_sem_t; - -static pthread_mutex_t se_remap_mx = PTHREAD_MUTEX_INITIALIZER; - -static int se_remap_used = 0; -static sem_t* se_remap_orig[VG_N_SEMAPHORES]; -static vg_sem_t se_remap_new[VG_N_SEMAPHORES]; - -static vg_sem_t* se_remap ( sem_t* orig ) -{ - int res, i; - res = __pthread_mutex_lock(&se_remap_mx); - my_assert(res == 0); - - for (i = 0; i < se_remap_used; i++) { - if (se_remap_orig[i] == orig) - break; - } - if (i == se_remap_used) { - if (se_remap_used == VG_N_SEMAPHORES) { - res = pthread_mutex_unlock(&se_remap_mx); - my_assert(res == 0); - barf("VG_N_SEMAPHORES is too low. Increase and recompile."); - } - se_remap_used++; - se_remap_orig[i] = orig; - /* printf("allocated semaphore %d\n", i); */ - } - res = __pthread_mutex_unlock(&se_remap_mx); - my_assert(res == 0); - return &se_remap_new[i]; -} - - -int sem_init(sem_t *sem, int pshared, unsigned int value) -{ - int res; - vg_sem_t* vg_sem; - ensure_valgrind("sem_init"); - if (pshared != 0) { - pthread_error("sem_init: unsupported pshared value"); - errno = ENOSYS; - return -1; - } - vg_sem = se_remap(sem); - res = pthread_mutex_init(&vg_sem->se_mx, NULL); - my_assert(res == 0); - res = pthread_cond_init(&vg_sem->se_cv, NULL); - my_assert(res == 0); - vg_sem->count = value; - return 0; -} - - -int sem_wait ( sem_t* sem ) -{ - int res; - vg_sem_t* vg_sem; - ensure_valgrind("sem_wait"); - vg_sem = se_remap(sem); - res = __pthread_mutex_lock(&vg_sem->se_mx); - my_assert(res == 0); - while (vg_sem->count == 0) { - res = pthread_cond_wait(&vg_sem->se_cv, &vg_sem->se_mx); - my_assert(res == 0); - } - vg_sem->count--; - res = __pthread_mutex_unlock(&vg_sem->se_mx); - my_assert(res == 0); - return 0; -} - -int sem_post ( sem_t* sem ) -{ - int res; - vg_sem_t* vg_sem; - ensure_valgrind("sem_post"); - vg_sem = se_remap(sem); - res = __pthread_mutex_lock(&vg_sem->se_mx); - my_assert(res == 0); - if (vg_sem->count == 0) { - vg_sem->count++; - res = pthread_cond_broadcast(&vg_sem->se_cv); - my_assert(res == 0); - } else { - vg_sem->count++; - } - res = __pthread_mutex_unlock(&vg_sem->se_mx); - my_assert(res == 0); - return 0; -} - - -int sem_trywait ( sem_t* sem ) -{ - int ret, res; - vg_sem_t* vg_sem; - ensure_valgrind("sem_trywait"); - vg_sem = se_remap(sem); - res = __pthread_mutex_lock(&vg_sem->se_mx); - my_assert(res == 0); - if (vg_sem->count > 0) { - vg_sem->count--; - ret = 0; - } else { - ret = -1; - errno = EAGAIN; - } - res = __pthread_mutex_unlock(&vg_sem->se_mx); - my_assert(res == 0); - return ret; -} - - -int sem_getvalue(sem_t* sem, int * sval) -{ - vg_sem_t* vg_sem; - ensure_valgrind("sem_trywait"); - vg_sem = se_remap(sem); - *sval = vg_sem->count; - return 0; -} - - -int sem_destroy(sem_t * sem) -{ - kludged("sem_destroy"); - /* if someone waiting on this semaphore, errno = EBUSY, return -1 */ - return 0; -} - - -/* --------------------------------------------------------------------- - Reader-writer locks. - ------------------------------------------------------------------ */ - -typedef - struct { - int initted; /* != 0 --> in use; sanity check only */ - int prefer_w; /* != 0 --> prefer writer */ - int nwait_r; /* # of waiting readers */ - int nwait_w; /* # of waiting writers */ - pthread_cond_t cv_r; /* for signalling readers */ - pthread_cond_t cv_w; /* for signalling writers */ - pthread_mutex_t mx; - int status; - /* allowed range for status: >= -1. -1 means 1 writer currently - active, >= 0 means N readers currently active. */ - } - vg_rwlock_t; - - -static pthread_mutex_t rw_remap_mx = PTHREAD_MUTEX_INITIALIZER; - -static int rw_remap_used = 0; -static pthread_rwlock_t* rw_remap_orig[VG_N_RWLOCKS]; -static vg_rwlock_t rw_remap_new[VG_N_RWLOCKS]; - - -static -void init_vg_rwlock ( vg_rwlock_t* vg_rwl ) -{ - int res = 0; - vg_rwl->initted = 1; - vg_rwl->prefer_w = 1; - vg_rwl->nwait_r = 0; - vg_rwl->nwait_w = 0; - vg_rwl->status = 0; - res = pthread_mutex_init(&vg_rwl->mx, NULL); - res |= pthread_cond_init(&vg_rwl->cv_r, NULL); - res |= pthread_cond_init(&vg_rwl->cv_w, NULL); - my_assert(res == 0); -} - - -/* Take the address of a LinuxThreads rwlock_t and return the shadow - address of our version. Further, if the LinuxThreads version - appears to have been statically initialised, do the same to the one - we allocate here. The pthread_rwlock_t.__rw_readers field is set - to zero by PTHREAD_RWLOCK_INITIALIZER, so we take zero as meaning - uninitialised and non-zero meaning initialised. -*/ -static vg_rwlock_t* rw_remap ( pthread_rwlock_t* orig ) -{ - int res, i; - vg_rwlock_t* vg_rwl; - res = __pthread_mutex_lock(&rw_remap_mx); - my_assert(res == 0); - - for (i = 0; i < rw_remap_used; i++) { - if (rw_remap_orig[i] == orig) - break; - } - if (i == rw_remap_used) { - if (rw_remap_used == VG_N_RWLOCKS) { - res = __pthread_mutex_unlock(&rw_remap_mx); - my_assert(res == 0); - barf("VG_N_RWLOCKS is too low. Increase and recompile."); - } - rw_remap_used++; - rw_remap_orig[i] = orig; - rw_remap_new[i].initted = 0; - if (0) printf("allocated rwlock %d\n", i); - } - res = __pthread_mutex_unlock(&rw_remap_mx); - my_assert(res == 0); - vg_rwl = &rw_remap_new[i]; - - /* Initialise the shadow, if required. */ - if (orig->__rw_readers == 0) { - orig->__rw_readers = 1; - init_vg_rwlock(vg_rwl); - if (orig->__rw_kind == PTHREAD_RWLOCK_PREFER_READER_NP) - vg_rwl->prefer_w = 0; - } - - return vg_rwl; -} - - -int pthread_rwlock_init ( pthread_rwlock_t* orig, - const pthread_rwlockattr_t* attr ) -{ - vg_rwlock_t* rwl; - if (0) printf ("pthread_rwlock_init\n"); - /* Force the remapper to initialise the shadow. */ - orig->__rw_readers = 0; - /* Install the lock preference; the remapper needs to know it. */ - orig->__rw_kind = PTHREAD_RWLOCK_DEFAULT_NP; - if (attr) - orig->__rw_kind = attr->__lockkind; - rwl = rw_remap ( orig ); - return 0; -} - - -static -void pthread_rwlock_rdlock_CANCEL_HDLR ( void* rwl_v ) -{ - vg_rwlock_t* rwl = (vg_rwlock_t*)rwl_v; - rwl->nwait_r--; - pthread_mutex_unlock (&rwl->mx); -} - - -int pthread_rwlock_rdlock ( pthread_rwlock_t* orig ) -{ - int res; - vg_rwlock_t* rwl; - if (0) printf ("pthread_rwlock_rdlock\n"); - rwl = rw_remap ( orig ); - res = __pthread_mutex_lock(&rwl->mx); - my_assert(res == 0); - if (!rwl->initted) { - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return EINVAL; - } - if (rwl->status < 0) { - my_assert(rwl->status == -1); - rwl->nwait_r++; - pthread_cleanup_push( pthread_rwlock_rdlock_CANCEL_HDLR, rwl ); - while (1) { - if (rwl->status == 0) break; - res = pthread_cond_wait(&rwl->cv_r, &rwl->mx); - my_assert(res == 0); - } - pthread_cleanup_pop(0); - rwl->nwait_r--; - } - my_assert(rwl->status >= 0); - rwl->status++; - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return 0; -} - - -int pthread_rwlock_tryrdlock ( pthread_rwlock_t* orig ) -{ - int res; - vg_rwlock_t* rwl; - if (0) printf ("pthread_rwlock_tryrdlock\n"); - rwl = rw_remap ( orig ); - res = __pthread_mutex_lock(&rwl->mx); - my_assert(res == 0); - if (!rwl->initted) { - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return EINVAL; - } - if (rwl->status == -1) { - /* Writer active; we have to give up. */ - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return EBUSY; - } - /* Success */ - my_assert(rwl->status >= 0); - rwl->status++; - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return 0; -} - - -static -void pthread_rwlock_wrlock_CANCEL_HDLR ( void* rwl_v ) -{ - vg_rwlock_t* rwl = (vg_rwlock_t*)rwl_v; - rwl->nwait_w--; - pthread_mutex_unlock (&rwl->mx); -} - - -int pthread_rwlock_wrlock ( pthread_rwlock_t* orig ) -{ - int res; - vg_rwlock_t* rwl; - if (0) printf ("pthread_rwlock_wrlock\n"); - rwl = rw_remap ( orig ); - res = __pthread_mutex_lock(&rwl->mx); - my_assert(res == 0); - if (!rwl->initted) { - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return EINVAL; - } - if (rwl->status != 0) { - rwl->nwait_w++; - pthread_cleanup_push( pthread_rwlock_wrlock_CANCEL_HDLR, rwl ); - while (1) { - if (rwl->status == 0) break; - res = pthread_cond_wait(&rwl->cv_w, &rwl->mx); - my_assert(res == 0); - } - pthread_cleanup_pop(0); - rwl->nwait_w--; - } - my_assert(rwl->status == 0); - rwl->status = -1; - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return 0; -} - - -int pthread_rwlock_trywrlock ( pthread_rwlock_t* orig ) -{ - int res; - vg_rwlock_t* rwl; - if (0) printf ("pthread_wrlock_trywrlock\n"); - rwl = rw_remap ( orig ); - res = __pthread_mutex_lock(&rwl->mx); - my_assert(res == 0); - if (!rwl->initted) { - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return EINVAL; - } - if (rwl->status != 0) { - /* Reader(s) or a writer active; we have to give up. */ - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return EBUSY; - } - /* Success */ - my_assert(rwl->status == 0); - rwl->status = -1; - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return 0; -} - - -int pthread_rwlock_unlock ( pthread_rwlock_t* orig ) -{ - int res; - vg_rwlock_t* rwl; - if (0) printf ("pthread_rwlock_unlock\n"); - rwl = rw_remap ( orig ); - rwl = rw_remap ( orig ); - res = __pthread_mutex_lock(&rwl->mx); - my_assert(res == 0); - if (!rwl->initted) { - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return EINVAL; - } - if (rwl->status == 0) { - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return EPERM; - } - my_assert(rwl->status != 0); - if (rwl->status == -1) { - rwl->status = 0; - } else { - my_assert(rwl->status > 0); - rwl->status--; - } - - my_assert(rwl->status >= 0); - - if (rwl->prefer_w) { - - /* Favour waiting writers, if any. */ - if (rwl->nwait_w > 0) { - /* Writer(s) are waiting. */ - if (rwl->status == 0) { - /* We can let a writer in. */ - res = pthread_cond_signal(&rwl->cv_w); - my_assert(res == 0); - } else { - /* There are still readers active. Do nothing; eventually - they will disappear, at which point a writer will be - admitted. */ - } - } - else - /* No waiting writers. */ - if (rwl->nwait_r > 0) { - /* Let in a waiting reader. */ - res = pthread_cond_signal(&rwl->cv_r); - my_assert(res == 0); - } - - } else { - - /* Favour waiting readers, if any. */ - if (rwl->nwait_r > 0) { - /* Reader(s) are waiting; let one in. */ - res = pthread_cond_signal(&rwl->cv_r); - my_assert(res == 0); - } - else - /* No waiting readers. */ - if (rwl->nwait_w > 0 && rwl->status == 0) { - /* We have waiting writers and no active readers; let a - writer in. */ - res = pthread_cond_signal(&rwl->cv_w); - my_assert(res == 0); - } - } - - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return 0; -} - - -int pthread_rwlock_destroy ( pthread_rwlock_t *orig ) -{ - int res; - vg_rwlock_t* rwl; - if (0) printf ("pthread_rwlock_destroy\n"); - rwl = rw_remap ( orig ); - res = __pthread_mutex_lock(&rwl->mx); - my_assert(res == 0); - if (!rwl->initted) { - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return EINVAL; - } - if (rwl->status != 0 || rwl->nwait_r > 0 || rwl->nwait_w > 0) { - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return EBUSY; - } - rwl->initted = 0; - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return 0; -} - - -/* Copied directly from LinuxThreads. */ -int -pthread_rwlockattr_init (pthread_rwlockattr_t *attr) -{ - attr->__lockkind = 0; - attr->__pshared = PTHREAD_PROCESS_PRIVATE; - - return 0; -} - -/* Copied directly from LinuxThreads. */ -int -pthread_rwlockattr_setpshared (pthread_rwlockattr_t *attr, int pshared) -{ - if (pshared != PTHREAD_PROCESS_PRIVATE && pshared != PTHREAD_PROCESS_SHARED) - return EINVAL; - - /* For now it is not possible to shared a conditional variable. */ - if (pshared != PTHREAD_PROCESS_PRIVATE) - return ENOSYS; - - attr->__pshared = pshared; - - return 0; -} - - -/* --------------------------------------------------------------------- - B'stard. - ------------------------------------------------------------------ */ - -# define strong_alias(name, aliasname) \ - extern __typeof (name) aliasname __attribute__ ((alias (#name))); - -# define weak_alias(name, aliasname) \ - extern __typeof (name) aliasname __attribute__ ((weak, alias (#name))); - -strong_alias(__pthread_mutex_lock, pthread_mutex_lock) -strong_alias(__pthread_mutex_trylock, pthread_mutex_trylock) -strong_alias(__pthread_mutex_unlock, pthread_mutex_unlock) -strong_alias(__pthread_mutexattr_init, pthread_mutexattr_init) - weak_alias(__pthread_mutexattr_settype, pthread_mutexattr_settype) -strong_alias(__pthread_mutex_init, pthread_mutex_init) -strong_alias(__pthread_mutexattr_destroy, pthread_mutexattr_destroy) -strong_alias(__pthread_mutex_destroy, pthread_mutex_destroy) -strong_alias(__pthread_once, pthread_once) -strong_alias(__pthread_atfork, pthread_atfork) -strong_alias(__pthread_key_create, pthread_key_create) -strong_alias(__pthread_getspecific, pthread_getspecific) -strong_alias(__pthread_setspecific, pthread_setspecific) - -#ifndef GLIBC_2_1 -strong_alias(sigaction, __sigaction) -#endif - -strong_alias(close, __close) -strong_alias(fcntl, __fcntl) -strong_alias(lseek, __lseek) -strong_alias(open, __open) -strong_alias(open64, __open64) -strong_alias(read, __read) -strong_alias(wait, __wait) -strong_alias(write, __write) -strong_alias(connect, __connect) -strong_alias(send, __send) - -weak_alias (__pread64, pread64) -weak_alias (__pwrite64, pwrite64) -weak_alias(__fork, fork) - -weak_alias (__pthread_kill_other_threads_np, pthread_kill_other_threads_np) - -/*--------------------------------------------------*/ - -weak_alias(pthread_rwlock_rdlock, __pthread_rwlock_rdlock) -weak_alias(pthread_rwlock_unlock, __pthread_rwlock_unlock) -weak_alias(pthread_rwlock_wrlock, __pthread_rwlock_wrlock) - -weak_alias(pthread_rwlock_destroy, __pthread_rwlock_destroy) -weak_alias(pthread_rwlock_init, __pthread_rwlock_init) -weak_alias(pthread_rwlock_tryrdlock, __pthread_rwlock_tryrdlock) -weak_alias(pthread_rwlock_trywrlock, __pthread_rwlock_trywrlock) - - -/* I've no idea what these are, but they get called quite a lot. - Anybody know? */ - -#undef _IO_flockfile -void _IO_flockfile ( _IO_FILE * file ) -{ - pthread_mutex_lock(file->_lock); -} -weak_alias(_IO_flockfile, flockfile); - - -#undef _IO_funlockfile -void _IO_funlockfile ( _IO_FILE * file ) -{ - pthread_mutex_unlock(file->_lock); -} -weak_alias(_IO_funlockfile, funlockfile); - - -/* This doesn't seem to be needed to simulate libpthread.so's external - interface, but many people complain about its absence. */ - -strong_alias(__pthread_mutexattr_settype, __pthread_mutexattr_setkind_np) -weak_alias(__pthread_mutexattr_setkind_np, pthread_mutexattr_setkind_np) - - -/*--------------------------------------------------------------------*/ -/*--- end vg_libpthread.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/arch/x86-linux/vg_libpthread_unimp.c b/coregrind/arch/x86-linux/vg_libpthread_unimp.c deleted file mode 100644 index f413887f27..0000000000 --- a/coregrind/arch/x86-linux/vg_libpthread_unimp.c +++ /dev/null @@ -1,262 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- Give dummy bindings for everything the real libpthread.so ---*/ -/*--- binds. vg_libpthread_unimp.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -/* --------------------------------------------------------------------- - ALL THIS CODE RUNS ON THE SIMULATED CPU. - Give a binding for everything the real libpthread.so binds. - ------------------------------------------------------------------ */ - -extern void vgPlain_unimp ( char* ); -#define unimp(str) vgPlain_unimp(str) - -//void _IO_flockfile ( void ) { unimp("_IO_flockfile"); } -void _IO_ftrylockfile ( void ) { unimp("_IO_ftrylockfile"); } -//void _IO_funlockfile ( void ) { unimp("_IO_funlockfile"); } -//void __close ( void ) { unimp("__close"); } -//void __connect ( void ) { unimp("__connect"); } -//void __errno_location ( void ) { unimp("__errno_location"); } -//void __fcntl ( void ) { unimp("__fcntl"); } -//void __fork ( void ) { unimp("__fork"); } -//void __h_errno_location ( void ) { unimp("__h_errno_location"); } -void __libc_allocate_rtsig ( void ) { unimp("__libc_allocate_rtsig"); } -void __libc_current_sigrtmax ( void ) { unimp("__libc_current_sigrtmax"); } -void __libc_current_sigrtmin ( void ) { unimp("__libc_current_sigrtmin"); } -//void __lseek ( void ) { unimp("__lseek"); } -//void __open ( void ) { unimp("__open"); } -//void __open64 ( void ) { unimp("__open64"); } -//void __pread64 ( void ) { unimp("__pread64"); } -//void __pthread_atfork ( void ) { unimp("__pthread_atfork"); } -//void __pthread_getspecific ( void ) { unimp("__pthread_getspecific"); } -//void __pthread_key_create ( void ) { unimp("__pthread_key_create"); } -//void __pthread_kill_other_threads_np ( void ) { unimp("__pthread_kill_other_threads_np"); } -//void __pthread_mutex_destroy ( void ) { unimp("__pthread_mutex_destroy"); } -//void __pthread_mutex_init ( void ) { unimp("__pthread_mutex_init"); } -//void __pthread_mutex_lock ( void ) { unimp("__pthread_mutex_lock"); } -//void __pthread_mutex_trylock ( void ) { unimp("__pthread_mutex_trylock"); } -//void __pthread_mutex_unlock ( void ) { unimp("__pthread_mutex_unlock"); } -//void __pthread_mutexattr_destroy ( void ) { unimp("__pthread_mutexattr_destroy"); } -//void __pthread_mutexattr_init ( void ) { unimp("__pthread_mutexattr_init"); } -//void __pthread_mutexattr_settype ( void ) { unimp("__pthread_mutexattr_settype"); } -//void __pthread_once ( void ) { unimp("__pthread_once"); } -//void __pthread_setspecific ( void ) { unimp("__pthread_setspecific"); } -//void __pwrite64 ( void ) { unimp("__pwrite64"); } -//void __read ( void ) { unimp("__read"); } -//void __res_state ( void ) { unimp("__res_state"); } -//void __send ( void ) { unimp("__send"); } -//void __sigaction ( void ) { unimp("__sigaction"); } -//--//void __vfork ( void ) { unimp("__vfork"); } -//void __wait ( void ) { unimp("__wait"); } -//void __write ( void ) { unimp("__write"); } -//void _pthread_cleanup_pop ( void ) { unimp("_pthread_cleanup_pop"); } -//void _pthread_cleanup_pop_restore ( void ) { unimp("_pthread_cleanup_pop_restore"); } -//void _pthread_cleanup_push ( void ) { unimp("_pthread_cleanup_push"); } -//void _pthread_cleanup_push_defer ( void ) { unimp("_pthread_cleanup_push_defer"); } -//void longjmp ( void ) { unimp("longjmp"); } -//void pthread_atfork ( void ) { unimp("pthread_atfork"); } -//void pthread_attr_destroy ( void ) { unimp("pthread_attr_destroy"); } -void pthread_attr_getdetachstate ( void ) { unimp("pthread_attr_getdetachstate"); } -void pthread_attr_getinheritsched ( void ) { unimp("pthread_attr_getinheritsched"); } -//void pthread_attr_getschedparam ( void ) { unimp("pthread_attr_getschedparam"); } -//void pthread_attr_getschedpolicy ( void ) { unimp("pthread_attr_getschedpolicy"); } -//void pthread_attr_getscope ( void ) { unimp("pthread_attr_getscope"); } - -//void pthread_attr_setdetachstate ( void ) { unimp("pthread_attr_setdetachstate"); } -//void pthread_attr_setinheritsched ( void ) { unimp("pthread_attr_setinheritsched"); } -//void pthread_attr_setschedparam ( void ) { unimp("pthread_attr_setschedparam"); } -//void pthread_attr_setschedpolicy ( void ) { unimp("pthread_attr_setschedpolicy"); } -//void pthread_attr_setscope ( void ) { unimp("pthread_attr_setscope"); } -void pthread_barrier_destroy ( void ) { unimp("pthread_barrier_destroy"); } -void pthread_barrier_init ( void ) { unimp("pthread_barrier_init"); } -void pthread_barrier_wait ( void ) { unimp("pthread_barrier_wait"); } -void pthread_barrierattr_destroy ( void ) { unimp("pthread_barrierattr_destroy"); } -void pthread_barrierattr_init ( void ) { unimp("pthread_barrierattr_init"); } -void pthread_barrierattr_setpshared ( void ) { unimp("pthread_barrierattr_setpshared"); } -//void pthread_cancel ( void ) { unimp("pthread_cancel"); } -//void pthread_cond_broadcast ( void ) { unimp("pthread_cond_broadcast"); } -//void pthread_cond_destroy ( void ) { unimp("pthread_cond_destroy"); } -//void pthread_cond_init ( void ) { unimp("pthread_cond_init"); } -//void pthread_cond_signal ( void ) { unimp("pthread_cond_signal"); } -//void pthread_cond_timedwait ( void ) { unimp("pthread_cond_timedwait"); } -//void pthread_cond_wait ( void ) { unimp("pthread_cond_wait"); } -//void pthread_condattr_destroy ( void ) { unimp("pthread_condattr_destroy"); } -void pthread_condattr_getpshared ( void ) { unimp("pthread_condattr_getpshared"); } -//void pthread_condattr_init ( void ) { unimp("pthread_condattr_init"); } -void pthread_condattr_setpshared ( void ) { unimp("pthread_condattr_setpshared"); } -//void pthread_detach ( void ) { unimp("pthread_detach"); } -//void pthread_equal ( void ) { unimp("pthread_equal"); } -//void pthread_exit ( void ) { unimp("pthread_exit"); } -//void pthread_getattr_np ( void ) { unimp("pthread_getattr_np"); } -void pthread_getcpuclockid ( void ) { unimp("pthread_getcpuclockid"); } -//void pthread_getschedparam ( void ) { unimp("pthread_getschedparam"); } -//void pthread_getspecific ( void ) { unimp("pthread_getspecific"); } -//void pthread_join ( void ) { unimp("pthread_join"); } -//void pthread_key_create ( void ) { unimp("pthread_key_create"); } -//void pthread_key_delete ( void ) { unimp("pthread_key_delete"); } -//void pthread_kill ( void ) { unimp("pthread_kill"); } -//void pthread_mutex_destroy ( void ) { unimp("pthread_mutex_destroy"); } -//void pthread_mutex_init ( void ) { unimp("pthread_mutex_init"); } -//void pthread_mutex_lock ( void ) { unimp("pthread_mutex_lock"); } -void pthread_mutex_timedlock ( void ) { unimp("pthread_mutex_timedlock"); } -//void pthread_mutex_trylock ( void ) { unimp("pthread_mutex_trylock"); } -//void pthread_mutex_unlock ( void ) { unimp("pthread_mutex_unlock"); } -//void pthread_mutexattr_destroy ( void ) { unimp("pthread_mutexattr_destroy"); } -//void pthread_mutexattr_init ( void ) { unimp("pthread_mutexattr_init"); } -//void pthread_once ( void ) { unimp("pthread_once"); } -//void pthread_rwlock_destroy ( void ) { unimp("pthread_rwlock_destroy"); } -//void pthread_rwlock_init ( void ) { unimp("pthread_rwlock_init"); } -//void pthread_rwlock_rdlock ( void ) { unimp("pthread_rwlock_rdlock"); } -void pthread_rwlock_timedrdlock ( void ) { unimp("pthread_rwlock_timedrdlock"); } -void pthread_rwlock_timedwrlock ( void ) { unimp("pthread_rwlock_timedwrlock"); } -//void pthread_rwlock_tryrdlock ( void ) { unimp("pthread_rwlock_tryrdlock"); } -//void pthread_rwlock_trywrlock ( void ) { unimp("pthread_rwlock_trywrlock"); } -//void pthread_rwlock_unlock ( void ) { unimp("pthread_rwlock_unlock"); } -//void pthread_rwlock_wrlock ( void ) { unimp("pthread_rwlock_wrlock"); } -void pthread_rwlockattr_destroy ( void ) { unimp("pthread_rwlockattr_destroy"); } -void pthread_rwlockattr_getkind_np ( void ) { unimp("pthread_rwlockattr_getkind_np"); } -void pthread_rwlockattr_getpshared ( void ) { unimp("pthread_rwlockattr_getpshared"); } -//void pthread_rwlockattr_init ( void ) { unimp("pthread_rwlockattr_init"); } -void pthread_rwlockattr_setkind_np ( void ) { unimp("pthread_rwlockattr_setkind_np"); } -//void pthread_rwlockattr_setpshared ( void ) { unimp("pthread_rwlockattr_setpshared"); } -//void pthread_self ( void ) { unimp("pthread_self"); } -//void pthread_setcancelstate ( void ) { unimp("pthread_setcancelstate"); } -//void pthread_setcanceltype ( void ) { unimp("pthread_setcanceltype"); } -//void pthread_setschedparam ( void ) { unimp("pthread_setschedparam"); } -//void pthread_setspecific ( void ) { unimp("pthread_setspecific"); } -//void pthread_sigmask ( void ) { unimp("pthread_sigmask"); } -//void pthread_testcancel ( void ) { unimp("pthread_testcancel"); } -//void raise ( void ) { unimp("raise"); } -void sem_close ( void ) { unimp("sem_close"); } -void sem_open ( void ) { unimp("sem_open"); } -void sem_timedwait ( void ) { unimp("sem_timedwait"); } -void sem_unlink ( void ) { unimp("sem_unlink"); } -//void sigaction ( void ) { unimp("sigaction"); } -//void siglongjmp ( void ) { unimp("siglongjmp"); } -//void sigwait ( void ) { unimp("sigwait"); } - -void __pthread_clock_gettime ( void ) { unimp("__pthread_clock_gettime"); } -void __pthread_clock_settime ( void ) { unimp("__pthread_clock_settime"); } - -#if 0 -void pthread_create@@GLIBC_2.1 ( void ) { unimp("pthread_create@@GLIBC_2.1"); } -void pthread_create@GLIBC_2.0 ( void ) { unimp("pthread_create@GLIBC_2.0"); } - -void sem_wait@@GLIBC_2.1 ( void ) { unimp("sem_wait@@GLIBC_2.1"); } -void sem_wait@GLIBC_2.0 ( void ) { unimp("sem_wait@GLIBC_2.0"); } - -void sem_trywait@@GLIBC_2.1 ( void ) { unimp("sem_trywait@@GLIBC_2.1"); } -void sem_trywait@GLIBC_2.0 ( void ) { unimp("sem_trywait@GLIBC_2.0"); } - -void sem_post@@GLIBC_2.1 ( void ) { unimp("sem_post@@GLIBC_2.1"); } -void sem_post@GLIBC_2.0 ( void ) { unimp("sem_post@GLIBC_2.0"); } - -void sem_destroy@@GLIBC_2.1 ( void ) { unimp("sem_destroy@@GLIBC_2.1"); } -void sem_destroy@GLIBC_2.0 ( void ) { unimp("sem_destroy@GLIBC_2.0"); } -void sem_getvalue@@GLIBC_2.1 ( void ) { unimp("sem_getvalue@@GLIBC_2.1"); } -void sem_getvalue@GLIBC_2.0 ( void ) { unimp("sem_getvalue@GLIBC_2.0"); } -void sem_init@@GLIBC_2.1 ( void ) { unimp("sem_init@@GLIBC_2.1"); } -void sem_init@GLIBC_2.0 ( void ) { unimp("sem_init@GLIBC_2.0"); } - -void pthread_attr_init@@GLIBC_2.1 ( void ) { unimp("pthread_attr_init@@GLIBC_2.1"); } -void pthread_attr_init@GLIBC_2.0 ( void ) { unimp("pthread_attr_init@GLIBC_2.0"); } -#endif - - - -# define strong_alias(name, aliasname) \ - extern __typeof (name) aliasname __attribute__ ((alias (#name))); - -# define weak_alias(name, aliasname) \ - extern __typeof (name) aliasname __attribute__ ((weak, alias (#name))); - -//weak_alias(pthread_rwlock_destroy, __pthread_rwlock_destroy) -//weak_alias(pthread_rwlock_init, __pthread_rwlock_init) -//weak_alias(pthread_rwlock_tryrdlock, __pthread_rwlock_tryrdlock) -//weak_alias(pthread_rwlock_trywrlock, __pthread_rwlock_trywrlock) -//weak_alias(pthread_rwlock_wrlock, __pthread_rwlock_wrlock) -weak_alias(_IO_ftrylockfile, ftrylockfile) - -//__attribute__((weak)) void pread ( void ) { vgPlain_unimp("pread"); } -//__attribute__((weak)) void pwrite ( void ) { vgPlain_unimp("pwrite"); } -//__attribute__((weak)) void msync ( void ) { vgPlain_unimp("msync"); } -//__attribute__((weak)) void pause ( void ) { vgPlain_unimp("pause"); } -//__attribute__((weak)) void recvfrom ( void ) { vgPlain_unimp("recvfrom"); } -//__attribute__((weak)) void recvmsg ( void ) { vgPlain_unimp("recvmsg"); } -//__attribute__((weak)) void sendmsg ( void ) { vgPlain_unimp("sendmsg"); } -__attribute__((weak)) void tcdrain ( void ) { vgPlain_unimp("tcdrain"); } -//--//__attribute__((weak)) void vfork ( void ) { vgPlain_unimp("vfork"); } - -__attribute__((weak)) void pthread_attr_getguardsize ( void ) - { vgPlain_unimp("pthread_attr_getguardsize"); } -__attribute__((weak)) void pthread_attr_getstack ( void ) - { vgPlain_unimp("pthread_attr_getstack"); } -__attribute__((weak)) void pthread_attr_getstackaddr ( void ) - { vgPlain_unimp("pthread_attr_getstackaddr"); } -__attribute__((weak)) void pthread_attr_getstacksize ( void ) - { vgPlain_unimp("pthread_attr_getstacksize"); } -__attribute__((weak)) void pthread_attr_setguardsize ( void ) - { vgPlain_unimp("pthread_attr_setguardsize"); } -__attribute__((weak)) void pthread_attr_setstack ( void ) - { vgPlain_unimp("pthread_attr_setstack"); } -__attribute__((weak)) void pthread_attr_setstackaddr ( void ) - { vgPlain_unimp("pthread_attr_setstackaddr"); } -//__attribute__((weak)) void pthread_attr_setstacksize ( void ) -// { vgPlain_unimp("pthread_attr_setstacksize"); } -__attribute__((weak)) void pthread_getconcurrency ( void ) - { vgPlain_unimp("pthread_getconcurrency"); } -//__attribute__((weak)) void pthread_kill_other_threads_np ( void ) -// { vgPlain_unimp("pthread_kill_other_threads_np"); } -__attribute__((weak)) void pthread_mutexattr_getkind_np ( void ) - { vgPlain_unimp("pthread_mutexattr_getkind_np"); } -__attribute__((weak)) void pthread_mutexattr_getpshared ( void ) - { vgPlain_unimp("pthread_mutexattr_getpshared"); } -__attribute__((weak)) void pthread_mutexattr_gettype ( void ) - { vgPlain_unimp("pthread_mutexattr_gettype"); } -__attribute__((weak)) void pthread_mutexattr_setkind_np ( void ) - { vgPlain_unimp("pthread_mutexattr_setkind_np"); } -__attribute__((weak)) void pthread_mutexattr_setpshared ( void ) - { vgPlain_unimp("pthread_mutexattr_setpshared"); } -__attribute__((weak)) void pthread_setconcurrency ( void ) - { vgPlain_unimp("pthread_setconcurrency"); } -__attribute__((weak)) void pthread_spin_destroy ( void ) - { vgPlain_unimp("pthread_spin_destroy"); } -__attribute__((weak)) void pthread_spin_init ( void ) - { vgPlain_unimp("pthread_spin_init"); } -__attribute__((weak)) void pthread_spin_lock ( void ) - { vgPlain_unimp("pthread_spin_lock"); } -__attribute__((weak)) void pthread_spin_trylock ( void ) - { vgPlain_unimp("pthread_spin_trylock"); } -__attribute__((weak)) void pthread_spin_unlock ( void ) - { vgPlain_unimp("pthread_spin_unlock"); } - - -/*--------------------------------------------------------------------*/ -/*--- end vg_libpthread_unimp.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/arch/x86-linux/vg_syscall.S b/coregrind/arch/x86-linux/vg_syscall.S deleted file mode 100644 index adabbedbbe..0000000000 --- a/coregrind/arch/x86-linux/vg_syscall.S +++ /dev/null @@ -1,104 +0,0 @@ - -##--------------------------------------------------------------------## -##--- Support for doing system calls. ---## -##--- vg_syscall.S ---## -##--------------------------------------------------------------------## - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_constants.h" - - -.globl VG_(do_syscall) - -# NOTE that this routine expects the simulated machines state -# to be in m_state_static. Therefore it needs to be wrapped by -# code which copies from baseBlock before the call, into -# m_state_static, and back afterwards. - -VG_(do_syscall): - # Save all the int registers of the real machines state on the - # simulators stack. - pushal - - # and save the real FPU state too - fwait - fnsave VG_(real_fpu_state_saved_over_syscall) - frstor VG_(real_fpu_state_saved_over_syscall) - - # remember what the simulators stack pointer is - movl %esp, VG_(esp_saved_over_syscall) - - # Now copy the simulated machines state into the real one - # esp still refers to the simulators stack - frstor VG_(m_state_static)+40 - movl VG_(m_state_static)+32, %eax - pushl %eax - popfl - movl VG_(m_state_static)+0, %eax - movl VG_(m_state_static)+4, %ecx - movl VG_(m_state_static)+8, %edx - movl VG_(m_state_static)+12, %ebx - movl VG_(m_state_static)+16, %esp - movl VG_(m_state_static)+20, %ebp - movl VG_(m_state_static)+24, %esi - movl VG_(m_state_static)+28, %edi - - # esp now refers to the simulatees stack - # Do the actual system call - int $0x80 - - # restore stack as soon as possible - # esp refers to simulatees stack - movl %esp, VG_(m_state_static)+16 - movl VG_(esp_saved_over_syscall), %esp - # esp refers to simulators stack - - # ... and undo everything else. - # Copy real state back to simulated state. - movl %eax, VG_(m_state_static)+0 - movl %ecx, VG_(m_state_static)+4 - movl %edx, VG_(m_state_static)+8 - movl %ebx, VG_(m_state_static)+12 - movl %ebp, VG_(m_state_static)+20 - movl %esi, VG_(m_state_static)+24 - movl %edi, VG_(m_state_static)+28 - pushfl - popl %eax - movl %eax, VG_(m_state_static)+32 - fwait - fnsave VG_(m_state_static)+40 - frstor VG_(m_state_static)+40 - - # Restore the state of the simulator - frstor VG_(real_fpu_state_saved_over_syscall) - popal - - ret - -##--------------------------------------------------------------------## -##--- end vg_syscall.S ---## -##--------------------------------------------------------------------## diff --git a/coregrind/demangle/Makefile.am b/coregrind/demangle/Makefile.am deleted file mode 100644 index 554c75bdbc..0000000000 --- a/coregrind/demangle/Makefile.am +++ /dev/null @@ -1,25 +0,0 @@ -INCLUDES = -I$(top_srcdir) - -CFLAGS = $(WERROR) -Winline -Wall -Wshadow -O -fomit-frame-pointer -g - -noinst_HEADERS = \ - ansidecl.h \ - dyn-string.h \ - demangle.h \ - safe-ctype.h - -noinst_LIBRARIES = libdemangle.a - -libdemangle_a_SOURCES = \ - cp-demangle.c cplus-dem.c dyn-string.c safe-ctype.c - -# some files don't like my config.h, so just pretend it does not exist... - -cp-demangle.o: cp-demangle.c - $(COMPILE) -Wno-unused -Wno-shadow -c $< -UHAVE_CONFIG_H - -dyn-string.o: dyn-string.c - $(COMPILE) -c $< -UHAVE_CONFIG_H - -cplus-dem.o: cplus-dem.c - $(COMPILE) -Wno-unused -c $< diff --git a/coregrind/demangle/ansidecl.h b/coregrind/demangle/ansidecl.h deleted file mode 100644 index 9a7c5777ff..0000000000 --- a/coregrind/demangle/ansidecl.h +++ /dev/null @@ -1,295 +0,0 @@ -/* ANSI and traditional C compatability macros - Copyright 1991, 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001 - Free Software Foundation, Inc. - This file is part of the GNU C Library. - -This program is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2 of the License, or -(at your option) any later version. - -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - -/* ANSI and traditional C compatibility macros - - ANSI C is assumed if __STDC__ is #defined. - - Macro ANSI C definition Traditional C definition - ----- ---- - ---------- ----------- - ---------- - ANSI_PROTOTYPES 1 not defined - PTR `void *' `char *' - PTRCONST `void *const' `char *' - LONG_DOUBLE `long double' `double' - const not defined `' - volatile not defined `' - signed not defined `' - VA_START(ap, var) va_start(ap, var) va_start(ap) - - Note that it is safe to write "void foo();" indicating a function - with no return value, in all K+R compilers we have been able to test. - - For declaring functions with prototypes, we also provide these: - - PARAMS ((prototype)) - -- for functions which take a fixed number of arguments. Use this - when declaring the function. When defining the function, write a - K+R style argument list. For example: - - char *strcpy PARAMS ((char *dest, char *source)); - ... - char * - strcpy (dest, source) - char *dest; - char *source; - { ... } - - - VPARAMS ((prototype, ...)) - -- for functions which take a variable number of arguments. Use - PARAMS to declare the function, VPARAMS to define it. For example: - - int printf PARAMS ((const char *format, ...)); - ... - int - printf VPARAMS ((const char *format, ...)) - { - ... - } - - For writing functions which take variable numbers of arguments, we - also provide the VA_OPEN, VA_CLOSE, and VA_FIXEDARG macros. These - hide the differences between K+R and C89 more - thoroughly than the simple VA_START() macro mentioned above. - - VA_OPEN and VA_CLOSE are used *instead of* va_start and va_end. - Immediately after VA_OPEN, put a sequence of VA_FIXEDARG calls - corresponding to the list of fixed arguments. Then use va_arg - normally to get the variable arguments, or pass your va_list object - around. You do not declare the va_list yourself; VA_OPEN does it - for you. - - Here is a complete example: - - int - printf VPARAMS ((const char *format, ...)) - { - int result; - - VA_OPEN (ap, format); - VA_FIXEDARG (ap, const char *, format); - - result = vfprintf (stdout, format, ap); - VA_CLOSE (ap); - - return result; - } - - - You can declare variables either before or after the VA_OPEN, - VA_FIXEDARG sequence. Also, VA_OPEN and VA_CLOSE are the beginning - and end of a block. They must appear at the same nesting level, - and any variables declared after VA_OPEN go out of scope at - VA_CLOSE. Unfortunately, with a K+R compiler, that includes the - argument list. You can have multiple instances of VA_OPEN/VA_CLOSE - pairs in a single function in case you need to traverse the - argument list more than once. - - For ease of writing code which uses GCC extensions but needs to be - portable to other compilers, we provide the GCC_VERSION macro that - simplifies testing __GNUC__ and __GNUC_MINOR__ together, and various - wrappers around __attribute__. Also, __extension__ will be #defined - to nothing if it doesn't work. See below. - - This header also defines a lot of obsolete macros: - CONST, VOLATILE, SIGNED, PROTO, EXFUN, DEFUN, DEFUN_VOID, - AND, DOTS, NOARGS. Don't use them. */ - -#ifndef _ANSIDECL_H -#define _ANSIDECL_H 1 - -/* Every source file includes this file, - so they will all get the switch for lint. */ -/* LINTLIBRARY */ - -/* Using MACRO(x,y) in cpp #if conditionals does not work with some - older preprocessors. Thus we can't define something like this: - -#define HAVE_GCC_VERSION(MAJOR, MINOR) \ - (__GNUC__ > (MAJOR) || (__GNUC__ == (MAJOR) && __GNUC_MINOR__ >= (MINOR))) - -and then test "#if HAVE_GCC_VERSION(2,7)". - -So instead we use the macro below and test it against specific values. */ - -/* This macro simplifies testing whether we are using gcc, and if it - is of a particular minimum version. (Both major & minor numbers are - significant.) This macro will evaluate to 0 if we are not using - gcc at all. */ -#ifndef GCC_VERSION -#define GCC_VERSION (__GNUC__ * 1000 + __GNUC_MINOR__) -#endif /* GCC_VERSION */ - -#if defined (__STDC__) || defined (_AIX) || (defined (__mips) && defined (_SYSTYPE_SVR4)) || defined(_WIN32) -/* All known AIX compilers implement these things (but don't always - define __STDC__). The RISC/OS MIPS compiler defines these things - in SVR4 mode, but does not define __STDC__. */ - -#define ANSI_PROTOTYPES 1 -#define PTR void * -#define PTRCONST void *const -#define LONG_DOUBLE long double - -#define PARAMS(ARGS) ARGS -#define VPARAMS(ARGS) ARGS -#define VA_START(VA_LIST, VAR) va_start(VA_LIST, VAR) - -/* variadic function helper macros */ -/* "struct Qdmy" swallows the semicolon after VA_OPEN/VA_FIXEDARG's - use without inhibiting further decls and without declaring an - actual variable. */ -#define VA_OPEN(AP, VAR) { va_list AP; va_start(AP, VAR); { struct Qdmy -#define VA_CLOSE(AP) } va_end(AP); } -#define VA_FIXEDARG(AP, T, N) struct Qdmy - -#undef const -#undef volatile -#undef signed - -/* inline requires special treatment; it's in C99, and GCC >=2.7 supports - it too, but it's not in C89. */ -#undef inline -#if __STDC_VERSION__ > 199901L -/* it's a keyword */ -#else -# if GCC_VERSION >= 2007 -# define inline __inline__ /* __inline__ prevents -pedantic warnings */ -# else -# define inline /* nothing */ -# endif -#endif - -/* These are obsolete. Do not use. */ -#ifndef IN_GCC -#define CONST const -#define VOLATILE volatile -#define SIGNED signed - -#define PROTO(type, name, arglist) type name arglist -#define EXFUN(name, proto) name proto -#define DEFUN(name, arglist, args) name(args) -#define DEFUN_VOID(name) name(void) -#define AND , -#define DOTS , ... -#define NOARGS void -#endif /* ! IN_GCC */ - -#else /* Not ANSI C. */ - -#undef ANSI_PROTOTYPES -#define PTR char * -#define PTRCONST PTR -#define LONG_DOUBLE double - -#define PARAMS(args) () -#define VPARAMS(args) (va_alist) va_dcl -#define VA_START(va_list, var) va_start(va_list) - -#define VA_OPEN(AP, VAR) { va_list AP; va_start(AP); { struct Qdmy -#define VA_CLOSE(AP) } va_end(AP); } -#define VA_FIXEDARG(AP, TYPE, NAME) TYPE NAME = va_arg(AP, TYPE) - -/* some systems define these in header files for non-ansi mode */ -#undef const -#undef volatile -#undef signed -#undef inline -#define const -#define volatile -#define signed -#define inline - -#ifndef IN_GCC -#define CONST -#define VOLATILE -#define SIGNED - -#define PROTO(type, name, arglist) type name () -#define EXFUN(name, proto) name() -#define DEFUN(name, arglist, args) name arglist args; -#define DEFUN_VOID(name) name() -#define AND ; -#define DOTS -#define NOARGS -#endif /* ! IN_GCC */ - -#endif /* ANSI C. */ - -/* Define macros for some gcc attributes. This permits us to use the - macros freely, and know that they will come into play for the - version of gcc in which they are supported. */ - -#if (GCC_VERSION < 2007) -# define __attribute__(x) -#endif - -/* Attribute __malloc__ on functions was valid as of gcc 2.96. */ -#ifndef ATTRIBUTE_MALLOC -# if (GCC_VERSION >= 2096) -# define ATTRIBUTE_MALLOC __attribute__ ((__malloc__)) -# else -# define ATTRIBUTE_MALLOC -# endif /* GNUC >= 2.96 */ -#endif /* ATTRIBUTE_MALLOC */ - -/* Attributes on labels were valid as of gcc 2.93. */ -#ifndef ATTRIBUTE_UNUSED_LABEL -# if (GCC_VERSION >= 2093) -# define ATTRIBUTE_UNUSED_LABEL ATTRIBUTE_UNUSED -# else -# define ATTRIBUTE_UNUSED_LABEL -# endif /* GNUC >= 2.93 */ -#endif /* ATTRIBUTE_UNUSED_LABEL */ - -#ifndef ATTRIBUTE_UNUSED -#define ATTRIBUTE_UNUSED __attribute__ ((__unused__)) -#endif /* ATTRIBUTE_UNUSED */ - -#ifndef ATTRIBUTE_NORETURN -#define ATTRIBUTE_NORETURN __attribute__ ((__noreturn__)) -#endif /* ATTRIBUTE_NORETURN */ - -#ifndef ATTRIBUTE_PRINTF -#define ATTRIBUTE_PRINTF(m, n) __attribute__ ((__format__ (__printf__, m, n))) -#define ATTRIBUTE_PRINTF_1 ATTRIBUTE_PRINTF(1, 2) -#define ATTRIBUTE_PRINTF_2 ATTRIBUTE_PRINTF(2, 3) -#define ATTRIBUTE_PRINTF_3 ATTRIBUTE_PRINTF(3, 4) -#define ATTRIBUTE_PRINTF_4 ATTRIBUTE_PRINTF(4, 5) -#define ATTRIBUTE_PRINTF_5 ATTRIBUTE_PRINTF(5, 6) -#endif /* ATTRIBUTE_PRINTF */ - -/* We use __extension__ in some places to suppress -pedantic warnings - about GCC extensions. This feature didn't work properly before - gcc 2.8. */ -#if GCC_VERSION < 2008 -#define __extension__ -#endif - -/* Bootstrap support: Adjust certain macros defined by Autoconf, - which are only valid for the stage1 compiler. If we detect - a modern version of GCC, we are probably in stage2 or beyond, - so unconditionally reset the values. Note that const, inline, - etc. have been dealt with above. */ -#if (GCC_VERSION >= 2007) -# ifndef HAVE_LONG_DOUBLE -# define HAVE_LONG_DOUBLE 1 -# endif -#endif /* GCC >= 2.7 */ - -#endif /* ansidecl.h */ diff --git a/coregrind/demangle/cp-demangle.c b/coregrind/demangle/cp-demangle.c deleted file mode 100644 index 5cf99c8c89..0000000000 --- a/coregrind/demangle/cp-demangle.c +++ /dev/null @@ -1,4174 +0,0 @@ -/* Demangler for IA64 / g++ V3 ABI. - Copyright (C) 2000, 2001 Free Software Foundation, Inc. - Written by Alex Samuel . - - This file is part of GNU CC. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. -*/ - -/* This file implements demangling of C++ names mangled according to - the IA64 / g++ V3 ABI. Use the cp_demangle function to - demangle a mangled name, or compile with the preprocessor macro - STANDALONE_DEMANGLER defined to create a demangling filter - executable (functionally similar to c++filt, but includes this - demangler only). */ - -#include - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#ifdef HAVE_STDLIB_H -#include -#endif - -#ifdef HAVE_STRING_H -#include -#endif - -#include "vg_include.h" -#include "ansidecl.h" -#include "dyn-string.h" -#include "demangle.h" - -#ifndef STANDALONE -#define malloc(s) VG_(malloc)(VG_AR_DEMANGLE, s) -#define free(p) VG_(free)(VG_AR_DEMANGLE, p) -#define realloc(p,s) VG_(realloc)(VG_AR_DEMANGLE, p, s) -#endif - -/* If CP_DEMANGLE_DEBUG is defined, a trace of the grammar evaluation, - and other debugging output, will be generated. */ -#ifdef CP_DEMANGLE_DEBUG -#define DEMANGLE_TRACE(PRODUCTION, DM) \ - fprintf (stderr, " -> %-24s at position %3d\n", \ - (PRODUCTION), current_position (DM)); -#else -#define DEMANGLE_TRACE(PRODUCTION, DM) -#endif - -/* Don't include , to prevent additional unresolved symbols - from being dragged into the C++ runtime library. */ -#define IS_DIGIT(CHAR) ((CHAR) >= '0' && (CHAR) <= '9') -#define IS_ALPHA(CHAR) \ - (((CHAR) >= 'a' && (CHAR) <= 'z') \ - || ((CHAR) >= 'A' && (CHAR) <= 'Z')) - -/* The prefix prepended by GCC to an identifier represnting the - anonymous namespace. */ -#define ANONYMOUS_NAMESPACE_PREFIX "_GLOBAL_" - -/* Character(s) to use for namespace separation in demangled output */ -#define NAMESPACE_SEPARATOR (dm->style == DMGL_JAVA ? "." : "::") - -/* If flag_verbose is zero, some simplifications will be made to the - output to make it easier to read and supress details that are - generally not of interest to the average C++ programmer. - Otherwise, the demangled representation will attempt to convey as - much information as the mangled form. */ -static int flag_verbose; - -/* If flag_strict is non-zero, demangle strictly according to the - specification -- don't demangle special g++ manglings. */ -static int flag_strict; - -/* String_list_t is an extended form of dyn_string_t which provides a - link field and a caret position for additions to the string. A - string_list_t may safely be cast to and used as a dyn_string_t. */ - -struct string_list_def -{ - /* The dyn_string; must be first. */ - struct dyn_string string; - - /* The position at which additional text is added to this string - (using the result_add* macros). This value is an offset from the - end of the string, not the beginning (and should be - non-positive). */ - int caret_position; - - /* The next string in the list. */ - struct string_list_def *next; -}; - -typedef struct string_list_def *string_list_t; - -/* Data structure representing a potential substitution. */ - -struct substitution_def -{ - /* The demangled text of the substitution. */ - dyn_string_t text; - - /* Whether this substitution represents a template item. */ - int template_p : 1; -}; - -/* Data structure representing a template argument list. */ - -struct template_arg_list_def -{ - /* The next (lower) template argument list in the stack of currently - active template arguments. */ - struct template_arg_list_def *next; - - /* The first element in the list of template arguments in - left-to-right order. */ - string_list_t first_argument; - - /* The last element in the arguments lists. */ - string_list_t last_argument; -}; - -typedef struct template_arg_list_def *template_arg_list_t; - -/* Data structure to maintain the state of the current demangling. */ - -struct demangling_def -{ - /* The full mangled name being mangled. */ - const char *name; - - /* Pointer into name at the current position. */ - const char *next; - - /* Stack for strings containing demangled result generated so far. - Text is emitted to the topmost (first) string. */ - string_list_t result; - - /* The number of presently available substitutions. */ - int num_substitutions; - - /* The allocated size of the substitutions array. */ - int substitutions_allocated; - - /* An array of available substitutions. The number of elements in - the array is given by num_substitions, and the allocated array - size in substitutions_size. - - The most recent substition is at the end, so - - - `S_' corresponds to substititutions[num_substitutions - 1] - - `S0_' corresponds to substititutions[num_substitutions - 2] - - etc. */ - struct substitution_def *substitutions; - - /* The stack of template argument lists. */ - template_arg_list_t template_arg_lists; - - /* The most recently demangled source-name. */ - dyn_string_t last_source_name; - - /* Language style to use for demangled output. */ - int style; - - /* Set to non-zero iff this name is a constructor. The actual value - indicates what sort of constructor this is; see demangle.h. */ - enum gnu_v3_ctor_kinds is_constructor; - - /* Set to non-zero iff this name is a destructor. The actual value - indicates what sort of destructor this is; see demangle.h. */ - enum gnu_v3_dtor_kinds is_destructor; - -}; - -typedef struct demangling_def *demangling_t; - -/* This type is the standard return code from most functions. Values - other than STATUS_OK contain descriptive messages. */ -typedef const char *status_t; - -/* Special values that can be used as a status_t. */ -#define STATUS_OK NULL -#define STATUS_ERROR "Error." -#define STATUS_UNIMPLEMENTED "Unimplemented." -#define STATUS_INTERNAL_ERROR "Internal error." - -/* This status code indicates a failure in malloc or realloc. */ -static const char *const status_allocation_failed = "Allocation failed."; -#define STATUS_ALLOCATION_FAILED status_allocation_failed - -/* Non-zero if STATUS indicates that no error has occurred. */ -#define STATUS_NO_ERROR(STATUS) ((STATUS) == STATUS_OK) - -/* Evaluate EXPR, which must produce a status_t. If the status code - indicates an error, return from the current function with that - status code. */ -#define RETURN_IF_ERROR(EXPR) \ - do \ - { \ - status_t s = EXPR; \ - if (!STATUS_NO_ERROR (s)) \ - return s; \ - } \ - while (0) - -static status_t int_to_dyn_string - PARAMS ((int, dyn_string_t)); -static string_list_t string_list_new - PARAMS ((int)); -static void string_list_delete - PARAMS ((string_list_t)); -static status_t result_add_separated_char - PARAMS ((demangling_t, int)); -static status_t result_push - PARAMS ((demangling_t)); -static string_list_t result_pop - PARAMS ((demangling_t)); -static int substitution_start - PARAMS ((demangling_t)); -static status_t substitution_add - PARAMS ((demangling_t, int, int)); -static dyn_string_t substitution_get - PARAMS ((demangling_t, int, int *)); -#ifdef CP_DEMANGLE_DEBUG -static void substitutions_print - PARAMS ((demangling_t, FILE *)); -#endif -static template_arg_list_t template_arg_list_new - PARAMS ((void)); -static void template_arg_list_delete - PARAMS ((template_arg_list_t)); -static void template_arg_list_add_arg - PARAMS ((template_arg_list_t, string_list_t)); -static string_list_t template_arg_list_get_arg - PARAMS ((template_arg_list_t, int)); -static void push_template_arg_list - PARAMS ((demangling_t, template_arg_list_t)); -static void pop_to_template_arg_list - PARAMS ((demangling_t, template_arg_list_t)); -#ifdef CP_DEMANGLE_DEBUG -static void template_arg_list_print - PARAMS ((template_arg_list_t, FILE *)); -#endif -static template_arg_list_t current_template_arg_list - PARAMS ((demangling_t)); -static demangling_t demangling_new - PARAMS ((const char *, int)); -static void demangling_delete - PARAMS ((demangling_t)); - -/* The last character of DS. Warning: DS is evaluated twice. */ -#define dyn_string_last_char(DS) \ - (dyn_string_buf (DS)[dyn_string_length (DS) - 1]) - -/* Append a space character (` ') to DS if it does not already end - with one. Evaluates to 1 on success, or 0 on allocation failure. */ -#define dyn_string_append_space(DS) \ - ((dyn_string_length (DS) > 0 \ - && dyn_string_last_char (DS) != ' ') \ - ? dyn_string_append_char ((DS), ' ') \ - : 1) - -/* Returns the index of the current position in the mangled name. */ -#define current_position(DM) ((DM)->next - (DM)->name) - -/* Returns the character at the current position of the mangled name. */ -#define peek_char(DM) (*((DM)->next)) - -/* Returns the character one past the current position of the mangled - name. */ -#define peek_char_next(DM) \ - (peek_char (DM) == '\0' ? '\0' : (*((DM)->next + 1))) - -/* Returns the character at the current position, and advances the - current position to the next character. */ -#define next_char(DM) (*((DM)->next)++) - -/* Returns non-zero if the current position is the end of the mangled - name, i.e. one past the last character. */ -#define end_of_name_p(DM) (peek_char (DM) == '\0') - -/* Advances the current position by one character. */ -#define advance_char(DM) (++(DM)->next) - -/* Returns the string containing the current demangled result. */ -#define result_string(DM) (&(DM)->result->string) - -/* Returns the position at which new text is inserted into the - demangled result. */ -#define result_caret_pos(DM) \ - (result_length (DM) + \ - ((string_list_t) result_string (DM))->caret_position) - -/* Adds a dyn_string_t to the demangled result. */ -#define result_add_string(DM, STRING) \ - (dyn_string_insert (&(DM)->result->string, \ - result_caret_pos (DM), (STRING)) \ - ? STATUS_OK : STATUS_ALLOCATION_FAILED) - -/* Adds NUL-terminated string CSTR to the demangled result. */ -#define result_add(DM, CSTR) \ - (dyn_string_insert_cstr (&(DM)->result->string, \ - result_caret_pos (DM), (CSTR)) \ - ? STATUS_OK : STATUS_ALLOCATION_FAILED) - -/* Adds character CHAR to the demangled result. */ -#define result_add_char(DM, CHAR) \ - (dyn_string_insert_char (&(DM)->result->string, \ - result_caret_pos (DM), (CHAR)) \ - ? STATUS_OK : STATUS_ALLOCATION_FAILED) - -/* Inserts a dyn_string_t to the demangled result at position POS. */ -#define result_insert_string(DM, POS, STRING) \ - (dyn_string_insert (&(DM)->result->string, (POS), (STRING)) \ - ? STATUS_OK : STATUS_ALLOCATION_FAILED) - -/* Inserts NUL-terminated string CSTR to the demangled result at - position POS. */ -#define result_insert(DM, POS, CSTR) \ - (dyn_string_insert_cstr (&(DM)->result->string, (POS), (CSTR)) \ - ? STATUS_OK : STATUS_ALLOCATION_FAILED) - -/* Inserts character CHAR to the demangled result at position POS. */ -#define result_insert_char(DM, POS, CHAR) \ - (dyn_string_insert_char (&(DM)->result->string, (POS), (CHAR)) \ - ? STATUS_OK : STATUS_ALLOCATION_FAILED) - -/* The length of the current demangled result. */ -#define result_length(DM) \ - dyn_string_length (&(DM)->result->string) - -/* Appends a (less-than, greater-than) character to the result in DM - to (open, close) a template argument or parameter list. Appends a - space first if necessary to prevent spurious elision of angle - brackets with the previous character. */ -#define result_open_template_list(DM) result_add_separated_char(DM, '<') -#define result_close_template_list(DM) result_add_separated_char(DM, '>') - -/* Appends a base 10 representation of VALUE to DS. STATUS_OK on - success. On failure, deletes DS and returns an error code. */ - -static status_t -int_to_dyn_string (value, ds) - int value; - dyn_string_t ds; -{ - int i; - int mask = 1; - - /* Handle zero up front. */ - if (value == 0) - { - if (!dyn_string_append_char (ds, '0')) - return STATUS_ALLOCATION_FAILED; - return STATUS_OK; - } - - /* For negative numbers, emit a minus sign. */ - if (value < 0) - { - if (!dyn_string_append_char (ds, '-')) - return STATUS_ALLOCATION_FAILED; - value = -value; - } - - /* Find the power of 10 of the first digit. */ - i = value; - while (i > 9) - { - mask *= 10; - i /= 10; - } - - /* Write the digits. */ - while (mask > 0) - { - int digit = value / mask; - - if (!dyn_string_append_char (ds, '0' + digit)) - return STATUS_ALLOCATION_FAILED; - - value -= digit * mask; - mask /= 10; - } - - return STATUS_OK; -} - -/* Creates a new string list node. The contents of the string are - empty, but the initial buffer allocation is LENGTH. The string - list node should be deleted with string_list_delete. Returns NULL - if allocation fails. */ - -static string_list_t -string_list_new (length) - int length; -{ - string_list_t s = (string_list_t) malloc (sizeof (struct string_list_def)); - s->caret_position = 0; - if (s == NULL) - return NULL; - if (!dyn_string_init ((dyn_string_t) s, length)) - return NULL; - return s; -} - -/* Deletes the entire string list starting at NODE. */ - -static void -string_list_delete (node) - string_list_t node; -{ - while (node != NULL) - { - string_list_t next = node->next; - dyn_string_delete ((dyn_string_t) node); - node = next; - } -} - -/* Appends CHARACTER to the demangled result. If the current trailing - character of the result is CHARACTER, a space is inserted first. */ - -static status_t -result_add_separated_char (dm, character) - demangling_t dm; - int character; -{ - char *result = dyn_string_buf (result_string (dm)); - int caret_pos = result_caret_pos (dm); - - /* Add a space if the last character is already the character we - want to add. */ - if (caret_pos > 0 && result[caret_pos - 1] == character) - RETURN_IF_ERROR (result_add_char (dm, ' ')); - /* Add the character. */ - RETURN_IF_ERROR (result_add_char (dm, character)); - - return STATUS_OK; -} - -/* Allocates and pushes a new string onto the demangled results stack - for DM. Subsequent demangling with DM will emit to the new string. - Returns STATUS_OK on success, STATUS_ALLOCATION_FAILED on - allocation failure. */ - -static status_t -result_push (dm) - demangling_t dm; -{ - string_list_t new_string = string_list_new (0); - if (new_string == NULL) - /* Allocation failed. */ - return STATUS_ALLOCATION_FAILED; - - /* Link the new string to the front of the list of result strings. */ - new_string->next = (string_list_t) dm->result; - dm->result = new_string; - return STATUS_OK; -} - -/* Removes and returns the topmost element on the demangled results - stack for DM. The caller assumes ownership for the returned - string. */ - -static string_list_t -result_pop (dm) - demangling_t dm; -{ - string_list_t top = dm->result; - dm->result = top->next; - return top; -} - -/* Returns the current value of the caret for the result string. The - value is an offet from the end of the result string. */ - -static int -result_get_caret (dm) - demangling_t dm; -{ - return ((string_list_t) result_string (dm))->caret_position; -} - -/* Sets the value of the caret for the result string, counted as an - offet from the end of the result string. */ - -static void -result_set_caret (dm, position) - demangling_t dm; - int position; -{ - ((string_list_t) result_string (dm))->caret_position = position; -} - -/* Shifts the position of the next addition to the result by - POSITION_OFFSET. A negative value shifts the caret to the left. */ - -static void -result_shift_caret (dm, position_offset) - demangling_t dm; - int position_offset; -{ - ((string_list_t) result_string (dm))->caret_position += position_offset; -} - -/* Returns non-zero if the character that comes right before the place - where text will be added to the result is a space. In this case, - the caller should supress adding another space. */ - -static int -result_previous_char_is_space (dm) - demangling_t dm; -{ - char *result = dyn_string_buf (result_string (dm)); - int pos = result_caret_pos (dm); - return pos > 0 && result[pos - 1] == ' '; -} - -/* Returns the start position of a fragment of the demangled result - that will be a substitution candidate. Should be called at the - start of productions that can add substitutions. */ - -static int -substitution_start (dm) - demangling_t dm; -{ - return result_caret_pos (dm); -} - -/* Adds the suffix of the current demangled result of DM starting at - START_POSITION as a potential substitution. If TEMPLATE_P is - non-zero, this potential substitution is a template-id. */ - -static status_t -substitution_add (dm, start_position, template_p) - demangling_t dm; - int start_position; - int template_p; -{ - dyn_string_t result = result_string (dm); - dyn_string_t substitution = dyn_string_new (0); - int i; - - if (substitution == NULL) - return STATUS_ALLOCATION_FAILED; - - /* Extract the substring of the current demangling result that - represents the subsitution candidate. */ - if (!dyn_string_substring (substitution, - result, start_position, result_caret_pos (dm))) - { - dyn_string_delete (substitution); - return STATUS_ALLOCATION_FAILED; - } - - /* If there's no room for the new entry, grow the array. */ - if (dm->substitutions_allocated == dm->num_substitutions) - { - size_t new_array_size; - if (dm->substitutions_allocated > 0) - dm->substitutions_allocated *= 2; - else - dm->substitutions_allocated = 2; - new_array_size = - sizeof (struct substitution_def) * dm->substitutions_allocated; - - dm->substitutions = (struct substitution_def *) - realloc (dm->substitutions, new_array_size); - if (dm->substitutions == NULL) - /* Realloc failed. */ - { - dyn_string_delete (substitution); - return STATUS_ALLOCATION_FAILED; - } - } - - /* Add the substitution to the array. */ - i = dm->num_substitutions++; - dm->substitutions[i].text = substitution; - dm->substitutions[i].template_p = template_p; - -#ifdef CP_DEMANGLE_DEBUG - substitutions_print (dm, stderr); -#endif - - return STATUS_OK; -} - -/* Returns the Nth-most-recent substitution. Sets *TEMPLATE_P to - non-zero if the substitution is a template-id, zero otherwise. - N is numbered from zero. DM retains ownership of the returned - string. If N is negative, or equal to or greater than the current - number of substitution candidates, returns NULL. */ - -static dyn_string_t -substitution_get (dm, n, template_p) - demangling_t dm; - int n; - int *template_p; -{ - struct substitution_def *sub; - - /* Make sure N is in the valid range. */ - if (n < 0 || n >= dm->num_substitutions) - return NULL; - - sub = &(dm->substitutions[n]); - *template_p = sub->template_p; - return sub->text; -} - -#ifdef CP_DEMANGLE_DEBUG -/* Debugging routine to print the current substitutions to FP. */ - -static void -substitutions_print (dm, fp) - demangling_t dm; - FILE *fp; -{ - int seq_id; - int num = dm->num_substitutions; - - fprintf (fp, "SUBSTITUTIONS:\n"); - for (seq_id = -1; seq_id < num - 1; ++seq_id) - { - int template_p; - dyn_string_t text = substitution_get (dm, seq_id + 1, &template_p); - - if (seq_id == -1) - fprintf (fp, " S_ "); - else - fprintf (fp, " S%d_", seq_id); - fprintf (fp, " %c: %s\n", template_p ? '*' : ' ', dyn_string_buf (text)); - } -} - -#endif /* CP_DEMANGLE_DEBUG */ - -/* Creates a new template argument list. Returns NULL if allocation - fails. */ - -static template_arg_list_t -template_arg_list_new () -{ - template_arg_list_t new_list = - (template_arg_list_t) malloc (sizeof (struct template_arg_list_def)); - if (new_list == NULL) - return NULL; - /* Initialize the new list to have no arguments. */ - new_list->first_argument = NULL; - new_list->last_argument = NULL; - /* Return the new list. */ - return new_list; -} - -/* Deletes a template argument list and the template arguments it - contains. */ - -static void -template_arg_list_delete (list) - template_arg_list_t list; -{ - /* If there are any arguments on LIST, delete them. */ - if (list->first_argument != NULL) - string_list_delete (list->first_argument); - /* Delete LIST. */ - free (list); -} - -/* Adds ARG to the template argument list ARG_LIST. */ - -static void -template_arg_list_add_arg (arg_list, arg) - template_arg_list_t arg_list; - string_list_t arg; -{ - if (arg_list->first_argument == NULL) - /* If there were no arguments before, ARG is the first one. */ - arg_list->first_argument = arg; - else - /* Make ARG the last argument on the list. */ - arg_list->last_argument->next = arg; - /* Make ARG the last on the list. */ - arg_list->last_argument = arg; - arg->next = NULL; -} - -/* Returns the template arugment at position INDEX in template - argument list ARG_LIST. */ - -static string_list_t -template_arg_list_get_arg (arg_list, index) - template_arg_list_t arg_list; - int index; -{ - string_list_t arg = arg_list->first_argument; - /* Scan down the list of arguments to find the one at position - INDEX. */ - while (index--) - { - arg = arg->next; - if (arg == NULL) - /* Ran out of arguments before INDEX hit zero. That's an - error. */ - return NULL; - } - /* Return the argument at position INDEX. */ - return arg; -} - -/* Pushes ARG_LIST onto the top of the template argument list stack. */ - -static void -push_template_arg_list (dm, arg_list) - demangling_t dm; - template_arg_list_t arg_list; -{ - arg_list->next = dm->template_arg_lists; - dm->template_arg_lists = arg_list; -#ifdef CP_DEMANGLE_DEBUG - fprintf (stderr, " ** pushing template arg list\n"); - template_arg_list_print (arg_list, stderr); -#endif -} - -/* Pops and deletes elements on the template argument list stack until - arg_list is the topmost element. If arg_list is NULL, all elements - are popped and deleted. */ - -static void -pop_to_template_arg_list (dm, arg_list) - demangling_t dm; - template_arg_list_t arg_list; -{ - while (dm->template_arg_lists != arg_list) - { - template_arg_list_t top = dm->template_arg_lists; - /* Disconnect the topmost element from the list. */ - dm->template_arg_lists = top->next; - /* Delete the popped element. */ - template_arg_list_delete (top); -#ifdef CP_DEMANGLE_DEBUG - fprintf (stderr, " ** removing template arg list\n"); -#endif - } -} - -#ifdef CP_DEMANGLE_DEBUG - -/* Prints the contents of ARG_LIST to FP. */ - -static void -template_arg_list_print (arg_list, fp) - template_arg_list_t arg_list; - FILE *fp; -{ - string_list_t arg; - int index = -1; - - fprintf (fp, "TEMPLATE ARGUMENT LIST:\n"); - for (arg = arg_list->first_argument; arg != NULL; arg = arg->next) - { - if (index == -1) - fprintf (fp, " T_ : "); - else - fprintf (fp, " T%d_ : ", index); - ++index; - fprintf (fp, "%s\n", dyn_string_buf ((dyn_string_t) arg)); - } -} - -#endif /* CP_DEMANGLE_DEBUG */ - -/* Returns the topmost element on the stack of template argument - lists. If there is no list of template arguments, returns NULL. */ - -static template_arg_list_t -current_template_arg_list (dm) - demangling_t dm; -{ - return dm->template_arg_lists; -} - -/* Allocates a demangling_t object for demangling mangled NAME. A new - result must be pushed before the returned object can be used. - Returns NULL if allocation fails. */ - -static demangling_t -demangling_new (name, style) - const char *name; - int style; -{ - demangling_t dm; - dm = (demangling_t) malloc (sizeof (struct demangling_def)); - if (dm == NULL) - return NULL; - - dm->name = name; - dm->next = name; - dm->result = NULL; - dm->num_substitutions = 0; - dm->substitutions_allocated = 10; - dm->template_arg_lists = NULL; - dm->last_source_name = dyn_string_new (0); - if (dm->last_source_name == NULL) - return NULL; - dm->substitutions = (struct substitution_def *) - malloc (dm->substitutions_allocated * sizeof (struct substitution_def)); - if (dm->substitutions == NULL) - { - dyn_string_delete (dm->last_source_name); - return NULL; - } - dm->style = style; - dm->is_constructor = 0; - dm->is_destructor = 0; - - return dm; -} - -/* Deallocates a demangling_t object and all memory associated with - it. */ - -static void -demangling_delete (dm) - demangling_t dm; -{ - int i; - template_arg_list_t arg_list = dm->template_arg_lists; - - /* Delete the stack of template argument lists. */ - while (arg_list != NULL) - { - template_arg_list_t next = arg_list->next; - template_arg_list_delete (arg_list); - arg_list = next; - } - /* Delete the list of substitutions. */ - for (i = dm->num_substitutions; --i >= 0; ) - dyn_string_delete (dm->substitutions[i].text); - free (dm->substitutions); - /* Delete the demangled result. */ - string_list_delete (dm->result); - /* Delete the stored identifier name. */ - dyn_string_delete (dm->last_source_name); - /* Delete the context object itself. */ - free (dm); -} - -/* These functions demangle an alternative of the corresponding - production in the mangling spec. The first argument of each is a - demangling context structure for the current demangling - operation. Most emit demangled text directly to the topmost result - string on the result string stack in the demangling context - structure. */ - -static status_t demangle_char - PARAMS ((demangling_t, int)); -static status_t demangle_mangled_name - PARAMS ((demangling_t)); -static status_t demangle_encoding - PARAMS ((demangling_t)); -static status_t demangle_name - PARAMS ((demangling_t, int *)); -static status_t demangle_nested_name - PARAMS ((demangling_t, int *)); -static status_t demangle_prefix_v3 - PARAMS ((demangling_t, int *)); -static status_t demangle_unqualified_name - PARAMS ((demangling_t, int *)); -static status_t demangle_source_name - PARAMS ((demangling_t)); -static status_t demangle_number - PARAMS ((demangling_t, int *, int, int)); -static status_t demangle_number_literally - PARAMS ((demangling_t, dyn_string_t, int, int)); -static status_t demangle_identifier - PARAMS ((demangling_t, int, dyn_string_t)); -static status_t demangle_operator_name - PARAMS ((demangling_t, int, int *)); -static status_t demangle_nv_offset - PARAMS ((demangling_t)); -static status_t demangle_v_offset - PARAMS ((demangling_t)); -static status_t demangle_call_offset - PARAMS ((demangling_t)); -static status_t demangle_special_name - PARAMS ((demangling_t)); -static status_t demangle_ctor_dtor_name - PARAMS ((demangling_t)); -static status_t demangle_type_ptr - PARAMS ((demangling_t, int *, int)); -static status_t demangle_type - PARAMS ((demangling_t)); -static status_t demangle_CV_qualifiers - PARAMS ((demangling_t, dyn_string_t)); -static status_t demangle_builtin_type - PARAMS ((demangling_t)); -static status_t demangle_function_type - PARAMS ((demangling_t, int *)); -static status_t demangle_bare_function_type - PARAMS ((demangling_t, int *)); -static status_t demangle_class_enum_type - PARAMS ((demangling_t, int *)); -static status_t demangle_array_type - PARAMS ((demangling_t, int *)); -static status_t demangle_template_param - PARAMS ((demangling_t)); -static status_t demangle_template_args_1 - PARAMS ((demangling_t, template_arg_list_t)); -static status_t demangle_template_args - PARAMS ((demangling_t)); -static status_t demangle_literal - PARAMS ((demangling_t)); -static status_t demangle_template_arg - PARAMS ((demangling_t)); -static status_t demangle_expression_v3 - PARAMS ((demangling_t)); -static status_t demangle_scope_expression - PARAMS ((demangling_t)); -static status_t demangle_expr_primary - PARAMS ((demangling_t)); -static status_t demangle_substitution - PARAMS ((demangling_t, int *)); -static status_t demangle_local_name - PARAMS ((demangling_t)); -static status_t demangle_discriminator - PARAMS ((demangling_t, int)); -static status_t cp_demangle - PARAMS ((const char *, dyn_string_t, int)); -#ifdef IN_LIBGCC2 -static status_t cp_demangle_type - PARAMS ((const char*, dyn_string_t)); -#endif - -/* When passed to demangle_bare_function_type, indicates that the - function's return type is not encoded before its parameter types. */ -#define BFT_NO_RETURN_TYPE NULL - -/* Check that the next character is C. If so, consume it. If not, - return an error. */ - -static status_t -demangle_char (dm, c) - demangling_t dm; - int c; -{ - static char *error_message = NULL; - - if (peek_char (dm) == c) - { - advance_char (dm); - return STATUS_OK; - } - else - { - vg_assert (0); - /* - if (error_message == NULL) - error_message = strdup ("Expected ?"); - error_message[9] = c; - return error_message; - */ - } -} - -/* Demangles and emits a . - - ::= _Z */ - -static status_t -demangle_mangled_name (dm) - demangling_t dm; -{ - DEMANGLE_TRACE ("mangled-name", dm); - RETURN_IF_ERROR (demangle_char (dm, '_')); - RETURN_IF_ERROR (demangle_char (dm, 'Z')); - RETURN_IF_ERROR (demangle_encoding (dm)); - return STATUS_OK; -} - -/* Demangles and emits an . - - ::= - ::= - ::= */ - -static status_t -demangle_encoding (dm) - demangling_t dm; -{ - int encode_return_type; - int start_position; - template_arg_list_t old_arg_list = current_template_arg_list (dm); - char peek = peek_char (dm); - - DEMANGLE_TRACE ("encoding", dm); - - /* Remember where the name starts. If it turns out to be a template - function, we'll have to insert the return type here. */ - start_position = result_caret_pos (dm); - - if (peek == 'G' || peek == 'T') - RETURN_IF_ERROR (demangle_special_name (dm)); - else - { - /* Now demangle the name. */ - RETURN_IF_ERROR (demangle_name (dm, &encode_return_type)); - - /* If there's anything left, the name was a function name, with - maybe its return type, and its parameter types, following. */ - if (!end_of_name_p (dm) - && peek_char (dm) != 'E') - { - if (encode_return_type) - /* Template functions have their return type encoded. The - return type should be inserted at start_position. */ - RETURN_IF_ERROR - (demangle_bare_function_type (dm, &start_position)); - else - /* Non-template functions don't have their return type - encoded. */ - RETURN_IF_ERROR - (demangle_bare_function_type (dm, BFT_NO_RETURN_TYPE)); - } - } - - /* Pop off template argument lists that were built during the - mangling of this name, to restore the old template context. */ - pop_to_template_arg_list (dm, old_arg_list); - - return STATUS_OK; -} - -/* Demangles and emits a . - - ::= - ::= - ::= - ::= - - ::= - ::= St # ::std:: - - - ::= - ::= */ - -static status_t -demangle_name (dm, encode_return_type) - demangling_t dm; - int *encode_return_type; -{ - int start = substitution_start (dm); - char peek = peek_char (dm); - int is_std_substitution = 0; - - /* Generally, the return type is encoded if the function is a - template-id, and suppressed otherwise. There are a few cases, - though, in which the return type is not encoded even for a - templated function. In these cases, this flag is set. */ - int suppress_return_type = 0; - - DEMANGLE_TRACE ("name", dm); - - switch (peek) - { - case 'N': - /* This is a . */ - RETURN_IF_ERROR (demangle_nested_name (dm, encode_return_type)); - break; - - case 'Z': - RETURN_IF_ERROR (demangle_local_name (dm)); - *encode_return_type = 0; - break; - - case 'S': - /* The `St' substitution allows a name nested in std:: to appear - without being enclosed in a nested name. */ - if (peek_char_next (dm) == 't') - { - (void) next_char (dm); - (void) next_char (dm); - RETURN_IF_ERROR (result_add (dm, "std::")); - RETURN_IF_ERROR - (demangle_unqualified_name (dm, &suppress_return_type)); - is_std_substitution = 1; - } - else - RETURN_IF_ERROR (demangle_substitution (dm, encode_return_type)); - /* Check if a template argument list immediately follows. - If so, then we just demangled an . */ - if (peek_char (dm) == 'I') - { - /* A template name of the form std:: is a - substitution candidate. */ - if (is_std_substitution) - RETURN_IF_ERROR (substitution_add (dm, start, 0)); - /* Demangle the here. */ - RETURN_IF_ERROR (demangle_template_args (dm)); - *encode_return_type = !suppress_return_type; - } - else - *encode_return_type = 0; - - break; - - default: - /* This is an or . */ - RETURN_IF_ERROR (demangle_unqualified_name (dm, &suppress_return_type)); - - /* If the is followed by template args, this - is an . */ - if (peek_char (dm) == 'I') - { - /* Add a substitution for the unqualified template name. */ - RETURN_IF_ERROR (substitution_add (dm, start, 0)); - - RETURN_IF_ERROR (demangle_template_args (dm)); - *encode_return_type = !suppress_return_type; - } - else - *encode_return_type = 0; - - break; - } - - return STATUS_OK; -} - -/* Demangles and emits a . - - ::= N [] E */ - -static status_t -demangle_nested_name (dm, encode_return_type) - demangling_t dm; - int *encode_return_type; -{ - char peek; - - DEMANGLE_TRACE ("nested-name", dm); - - RETURN_IF_ERROR (demangle_char (dm, 'N')); - - peek = peek_char (dm); - if (peek == 'r' || peek == 'V' || peek == 'K') - { - dyn_string_t cv_qualifiers; - status_t status; - - /* Snarf up CV qualifiers. */ - cv_qualifiers = dyn_string_new (24); - if (cv_qualifiers == NULL) - return STATUS_ALLOCATION_FAILED; - demangle_CV_qualifiers (dm, cv_qualifiers); - - /* Emit them, preceded by a space. */ - status = result_add_char (dm, ' '); - if (STATUS_NO_ERROR (status)) - status = result_add_string (dm, cv_qualifiers); - /* The CV qualifiers that occur in a will be - qualifiers for member functions. These are placed at the end - of the function. Therefore, shift the caret to the left by - the length of the qualifiers, so other text is inserted - before them and they stay at the end. */ - result_shift_caret (dm, -dyn_string_length (cv_qualifiers) - 1); - /* Clean up. */ - dyn_string_delete (cv_qualifiers); - RETURN_IF_ERROR (status); - } - - RETURN_IF_ERROR (demangle_prefix_v3 (dm, encode_return_type)); - /* No need to demangle the final ; demangle_prefix - will handle it. */ - RETURN_IF_ERROR (demangle_char (dm, 'E')); - - return STATUS_OK; -} - -/* Demangles and emits a . - - ::= - ::= - ::= # empty - ::= - - ::= - ::= */ - -static status_t -demangle_prefix_v3 (dm, encode_return_type) - demangling_t dm; - int *encode_return_type; -{ - int start = substitution_start (dm); - int nested = 0; - - /* ENCODE_RETURN_TYPE is updated as we decend the nesting chain. - After , it is set to non-zero; after everything - else it is set to zero. */ - - /* Generally, the return type is encoded if the function is a - template-id, and suppressed otherwise. There are a few cases, - though, in which the return type is not encoded even for a - templated function. In these cases, this flag is set. */ - int suppress_return_type = 0; - - DEMANGLE_TRACE ("prefix", dm); - - while (1) - { - char peek; - - if (end_of_name_p (dm)) - return "Unexpected end of name in ."; - - peek = peek_char (dm); - - /* We'll initialize suppress_return_type to false, and set it to true - if we end up demangling a constructor name. However, make - sure we're not actually about to demangle template arguments - -- if so, this is the following a - , so we'll want the previous flag value - around. */ - if (peek != 'I') - suppress_return_type = 0; - - if (IS_DIGIT ((unsigned char) peek) - || (peek >= 'a' && peek <= 'z') - || peek == 'C' || peek == 'D' - || peek == 'S') - { - /* We have another level of scope qualification. */ - if (nested) - RETURN_IF_ERROR (result_add (dm, NAMESPACE_SEPARATOR)); - else - nested = 1; - - if (peek == 'S') - /* The substitution determines whether this is a - template-id. */ - RETURN_IF_ERROR (demangle_substitution (dm, encode_return_type)); - else - { - /* It's just a name. */ - RETURN_IF_ERROR - (demangle_unqualified_name (dm, &suppress_return_type)); - *encode_return_type = 0; - } - } - else if (peek == 'Z') - RETURN_IF_ERROR (demangle_local_name (dm)); - else if (peek == 'I') - { - RETURN_IF_ERROR (demangle_template_args (dm)); - - /* Now we want to indicate to the caller that we've - demangled template arguments, thus the prefix was a - . That's so that the caller knows to - demangle the function's return type, if this turns out to - be a function name. But, if it's a member template - constructor or a templated conversion operator, report it - as untemplated. Those never get encoded return types. */ - *encode_return_type = !suppress_return_type; - } - else if (peek == 'E') - /* All done. */ - return STATUS_OK; - else - return "Unexpected character in ."; - - if (peek != 'S' - && peek_char (dm) != 'E') - /* Add a new substitution for the prefix thus far. */ - RETURN_IF_ERROR (substitution_add (dm, start, *encode_return_type)); - } -} - -/* Demangles and emits an . If this - is for a special function type that should never - have its return type encoded (particularly, a constructor or - conversion operator), *SUPPRESS_RETURN_TYPE is set to 1; otherwise, - it is set to zero. - - ::= - ::= - ::= */ - -static status_t -demangle_unqualified_name (dm, suppress_return_type) - demangling_t dm; - int *suppress_return_type; -{ - char peek = peek_char (dm); - - DEMANGLE_TRACE ("unqualified-name", dm); - - /* By default, don't force suppression of the return type (though - non-template functions still don't get a return type encoded). */ - *suppress_return_type = 0; - - if (IS_DIGIT ((unsigned char) peek)) - RETURN_IF_ERROR (demangle_source_name (dm)); - else if (peek >= 'a' && peek <= 'z') - { - int num_args; - - /* Conversion operators never have a return type encoded. */ - if (peek == 'c' && peek_char_next (dm) == 'v') - *suppress_return_type = 1; - - RETURN_IF_ERROR (demangle_operator_name (dm, 0, &num_args)); - } - else if (peek == 'C' || peek == 'D') - { - /* Constructors never have a return type encoded. */ - if (peek == 'C') - *suppress_return_type = 1; - - RETURN_IF_ERROR (demangle_ctor_dtor_name (dm)); - } - else - return "Unexpected character in ."; - - return STATUS_OK; -} - -/* Demangles and emits . - - ::= */ - -static status_t -demangle_source_name (dm) - demangling_t dm; -{ - int length; - - DEMANGLE_TRACE ("source-name", dm); - - /* Decode the length of the identifier. */ - RETURN_IF_ERROR (demangle_number (dm, &length, 10, 0)); - if (length == 0) - return "Zero length in ."; - - /* Now the identifier itself. It's placed into last_source_name, - where it can be used to build a constructor or destructor name. */ - RETURN_IF_ERROR (demangle_identifier (dm, length, - dm->last_source_name)); - - /* Emit it. */ - RETURN_IF_ERROR (result_add_string (dm, dm->last_source_name)); - - return STATUS_OK; -} - -/* Demangles a number, either a or a at the - current position, consuming all consecutive digit characters. Sets - *VALUE to the resulting numberand returns STATUS_OK. The number is - interpreted as BASE, which must be either 10 or 36. If IS_SIGNED - is non-zero, negative numbers -- prefixed with `n' -- are accepted. - - ::= [n] - - ::= */ - -static status_t -demangle_number (dm, value, base, is_signed) - demangling_t dm; - int *value; - int base; - int is_signed; -{ - dyn_string_t number = dyn_string_new (10); - - DEMANGLE_TRACE ("number", dm); - - if (number == NULL) - return STATUS_ALLOCATION_FAILED; - - demangle_number_literally (dm, number, base, is_signed); - /* - *value = strtol (dyn_string_buf (number), NULL, base); - */ - /* vg_assert( base == 10 ); */ - if ( base != 10 && base != 36 ) { - dyn_string_delete(number); - return STATUS_UNIMPLEMENTED; - } - - if (base == 36) { - *value = VG_(atoll36) (dyn_string_buf (number)); - } else { - *value = VG_(atoll) (dyn_string_buf (number)); - } - dyn_string_delete (number); - - return STATUS_OK; -} - -/* Demangles a number at the current position. The digits (and minus - sign, if present) that make up the number are appended to STR. - Only base-BASE digits are accepted; BASE must be either 10 or 36. - If IS_SIGNED, negative numbers -- prefixed with `n' -- are - accepted. Does not consume a trailing underscore or other - terminating character. */ - -static status_t -demangle_number_literally (dm, str, base, is_signed) - demangling_t dm; - dyn_string_t str; - int base; - int is_signed; -{ - DEMANGLE_TRACE ("number*", dm); - - if (base != 10 && base != 36) - return STATUS_INTERNAL_ERROR; - - /* An `n' denotes a negative number. */ - if (is_signed && peek_char (dm) == 'n') - { - /* Skip past the n. */ - advance_char (dm); - /* The normal way to write a negative number is with a minus - sign. */ - if (!dyn_string_append_char (str, '-')) - return STATUS_ALLOCATION_FAILED; - } - - /* Loop until we hit a non-digit. */ - while (1) - { - char peek = peek_char (dm); - if (IS_DIGIT ((unsigned char) peek) - || (base == 36 && peek >= 'A' && peek <= 'Z')) - { - /* Accumulate digits. */ - if (!dyn_string_append_char (str, next_char (dm))) - return STATUS_ALLOCATION_FAILED; - } - else - /* Not a digit? All done. */ - break; - } - - return STATUS_OK; -} - -/* Demangles an identifier at the current position of LENGTH - characters and places it in IDENTIFIER. */ - -static status_t -demangle_identifier (dm, length, identifier) - demangling_t dm; - int length; - dyn_string_t identifier; -{ - DEMANGLE_TRACE ("identifier", dm); - - dyn_string_clear (identifier); - if (!dyn_string_resize (identifier, length)) - return STATUS_ALLOCATION_FAILED; - - while (length-- > 0) - { - if (end_of_name_p (dm)) - return "Unexpected end of name in ."; - if (!dyn_string_append_char (identifier, next_char (dm))) - return STATUS_ALLOCATION_FAILED; - } - - /* GCC encodes anonymous namespaces using a `_GLOBAL_[_.$]N.' - followed by the source file name and some random characters. - Unless we're in strict mode, decipher these names appropriately. */ - if (!flag_strict) - { - char *name = dyn_string_buf (identifier); - int prefix_length = VG_(strlen) (ANONYMOUS_NAMESPACE_PREFIX); - - /* Compare the first, fixed part. */ - if (VG_(strncmp) (name, ANONYMOUS_NAMESPACE_PREFIX, prefix_length) == 0) - { - name += prefix_length; - /* The next character might be a period, an underscore, or - dollar sign, depending on the target architecture's - assembler's capabilities. After that comes an `N'. */ - if ((*name == '.' || *name == '_' || *name == '$') - && *(name + 1) == 'N') - /* This looks like the anonymous namespace identifier. - Replace it with something comprehensible. */ - dyn_string_copy_cstr (identifier, "(anonymous namespace)"); - } - } - - return STATUS_OK; -} - -/* Demangles and emits an . If SHORT_NAME is non-zero, - the short form is emitted; otherwise the full source form - (`operator +' etc.) is emitted. *NUM_ARGS is set to the number of - operands that the operator takes. - - - ::= nw # new - ::= na # new[] - ::= dl # delete - ::= da # delete[] - ::= ps # + (unary) - ::= ng # - (unary) - ::= ad # & (unary) - ::= de # * (unary) - ::= co # ~ - ::= pl # + - ::= mi # - - ::= ml # * - ::= dv # / - ::= rm # % - ::= an # & - ::= or # | - ::= eo # ^ - ::= aS # = - ::= pL # += - ::= mI # -= - ::= mL # *= - ::= dV # /= - ::= rM # %= - ::= aN # &= - ::= oR # |= - ::= eO # ^= - ::= ls # << - ::= rs # >> - ::= lS # <<= - ::= rS # >>= - ::= eq # == - ::= ne # != - ::= lt # < - ::= gt # > - ::= le # <= - ::= ge # >= - ::= nt # ! - ::= aa # && - ::= oo # || - ::= pp # ++ - ::= mm # -- - ::= cm # , - ::= pm # ->* - ::= pt # -> - ::= cl # () - ::= ix # [] - ::= qu # ? - ::= sz # sizeof - ::= cv # cast - ::= v [0-9] # vendor extended operator */ - -static status_t -demangle_operator_name (dm, short_name, num_args) - demangling_t dm; - int short_name; - int *num_args; -{ - struct operator_code - { - /* The mangled code for this operator. */ - const char *const code; - /* The source name of this operator. */ - const char *const name; - /* The number of arguments this operator takes. */ - const int num_args; - }; - - static const struct operator_code operators[] = - { - { "aN", "&=" , 2 }, - { "aS", "=" , 2 }, - { "aa", "&&" , 2 }, - { "ad", "&" , 1 }, - { "an", "&" , 2 }, - { "cl", "()" , 0 }, - { "cm", "," , 2 }, - { "co", "~" , 1 }, - { "dV", "/=" , 2 }, - { "da", " delete[]", 1 }, - { "de", "*" , 1 }, - { "dl", " delete" , 1 }, - { "dv", "/" , 2 }, - { "eO", "^=" , 2 }, - { "eo", "^" , 2 }, - { "eq", "==" , 2 }, - { "ge", ">=" , 2 }, - { "gt", ">" , 2 }, - { "ix", "[]" , 2 }, - { "lS", "<<=" , 2 }, - { "le", "<=" , 2 }, - { "ls", "<<" , 2 }, - { "lt", "<" , 2 }, - { "mI", "-=" , 2 }, - { "mL", "*=" , 2 }, - { "mi", "-" , 2 }, - { "ml", "*" , 2 }, - { "mm", "--" , 1 }, - { "na", " new[]" , 1 }, - { "ne", "!=" , 2 }, - { "ng", "-" , 1 }, - { "nt", "!" , 1 }, - { "nw", " new" , 1 }, - { "oR", "|=" , 2 }, - { "oo", "||" , 2 }, - { "or", "|" , 2 }, - { "pL", "+=" , 2 }, - { "pl", "+" , 2 }, - { "pm", "->*" , 2 }, - { "pp", "++" , 1 }, - { "ps", "+" , 1 }, - { "pt", "->" , 2 }, - { "qu", "?" , 3 }, - { "rM", "%=" , 2 }, - { "rS", ">>=" , 2 }, - { "rm", "%" , 2 }, - { "rs", ">>" , 2 }, - { "sz", " sizeof" , 1 } - }; - - const int num_operators = - sizeof (operators) / sizeof (struct operator_code); - - int c0 = next_char (dm); - int c1 = next_char (dm); - const struct operator_code* p1 = operators; - const struct operator_code* p2 = operators + num_operators; - - DEMANGLE_TRACE ("operator-name", dm); - - /* Is this a vendor-extended operator? */ - if (c0 == 'v' && IS_DIGIT (c1)) - { - RETURN_IF_ERROR (result_add (dm, "operator ")); - RETURN_IF_ERROR (demangle_source_name (dm)); - *num_args = 0; - return STATUS_OK; - } - - /* Is this a conversion operator? */ - if (c0 == 'c' && c1 == 'v') - { - RETURN_IF_ERROR (result_add (dm, "operator ")); - /* Demangle the converted-to type. */ - RETURN_IF_ERROR (demangle_type (dm)); - *num_args = 0; - return STATUS_OK; - } - - /* Perform a binary search for the operator code. */ - while (1) - { - const struct operator_code* p = p1 + (p2 - p1) / 2; - char match0 = p->code[0]; - char match1 = p->code[1]; - - if (c0 == match0 && c1 == match1) - /* Found it. */ - { - if (!short_name) - RETURN_IF_ERROR (result_add (dm, "operator")); - RETURN_IF_ERROR (result_add (dm, p->name)); - *num_args = p->num_args; - - return STATUS_OK; - } - - if (p == p1) - /* Couldn't find it. */ - return "Unknown code in ."; - - /* Try again. */ - if (c0 < match0 || (c0 == match0 && c1 < match1)) - p2 = p; - else - p1 = p; - } -} - -/* Demangles and omits an . - - ::= # non-virtual base override */ - -static status_t -demangle_nv_offset (dm) - demangling_t dm; -{ - dyn_string_t number; - status_t status = STATUS_OK; - - DEMANGLE_TRACE ("h-offset", dm); - - /* Demangle the offset. */ - number = dyn_string_new (4); - if (number == NULL) - return STATUS_ALLOCATION_FAILED; - demangle_number_literally (dm, number, 10, 1); - - /* Don't display the offset unless in verbose mode. */ - if (flag_verbose) - { - status = result_add (dm, " [nv:"); - if (STATUS_NO_ERROR (status)) - status = result_add_string (dm, number); - if (STATUS_NO_ERROR (status)) - status = result_add_char (dm, ']'); - } - - /* Clean up. */ - dyn_string_delete (number); - RETURN_IF_ERROR (status); - return STATUS_OK; -} - -/* Demangles and emits a . - - ::= _ - # virtual base override, with vcall offset */ - -static status_t -demangle_v_offset (dm) - demangling_t dm; -{ - dyn_string_t number; - status_t status = STATUS_OK; - - DEMANGLE_TRACE ("v-offset", dm); - - /* Demangle the offset. */ - number = dyn_string_new (4); - if (number == NULL) - return STATUS_ALLOCATION_FAILED; - demangle_number_literally (dm, number, 10, 1); - - /* Don't display the offset unless in verbose mode. */ - if (flag_verbose) - { - status = result_add (dm, " [v:"); - if (STATUS_NO_ERROR (status)) - status = result_add_string (dm, number); - if (STATUS_NO_ERROR (status)) - result_add_char (dm, ','); - } - dyn_string_delete (number); - RETURN_IF_ERROR (status); - - /* Demangle the separator. */ - RETURN_IF_ERROR (demangle_char (dm, '_')); - - /* Demangle the vcall offset. */ - number = dyn_string_new (4); - if (number == NULL) - return STATUS_ALLOCATION_FAILED; - demangle_number_literally (dm, number, 10, 1); - - /* Don't display the vcall offset unless in verbose mode. */ - if (flag_verbose) - { - status = result_add_string (dm, number); - if (STATUS_NO_ERROR (status)) - status = result_add_char (dm, ']'); - } - dyn_string_delete (number); - RETURN_IF_ERROR (status); - - return STATUS_OK; -} - -/* Demangles and emits a . - - ::= h _ - ::= v _ */ - -static status_t -demangle_call_offset (dm) - demangling_t dm; -{ - DEMANGLE_TRACE ("call-offset", dm); - - switch (peek_char (dm)) - { - case 'h': - advance_char (dm); - /* Demangle the offset. */ - RETURN_IF_ERROR (demangle_nv_offset (dm)); - /* Demangle the separator. */ - RETURN_IF_ERROR (demangle_char (dm, '_')); - break; - - case 'v': - advance_char (dm); - /* Demangle the offset. */ - RETURN_IF_ERROR (demangle_v_offset (dm)); - /* Demangle the separator. */ - RETURN_IF_ERROR (demangle_char (dm, '_')); - break; - - default: - return "Unrecognized ."; - } - - return STATUS_OK; -} - -/* Demangles and emits a . - - ::= GV # Guard variable - ::= TV # virtual table - ::= TT # VTT - ::= TI # typeinfo structure - ::= TS # typeinfo name - - Other relevant productions include thunks: - - ::= T - # base is the nominal target function of thunk - - ::= Tc - # base is the nominal target function of thunk - # first call-offset is 'this' adjustment - # second call-offset is result adjustment - - where - - ::= h _ - ::= v _ - - Also demangles the special g++ manglings, - - ::= TC _ - # construction vtable - ::= TF # typeinfo function (old ABI only) - ::= TJ # java Class structure */ - -static status_t -demangle_special_name (dm) - demangling_t dm; -{ - dyn_string_t number; - int unused; - char peek = peek_char (dm); - - DEMANGLE_TRACE ("special-name", dm); - - if (peek == 'G') - { - /* Consume the G. */ - advance_char (dm); - switch (peek_char (dm)) - { - case 'V': - /* A guard variable name. */ - advance_char (dm); - RETURN_IF_ERROR (result_add (dm, "guard variable for ")); - RETURN_IF_ERROR (demangle_name (dm, &unused)); - break; - - case 'R': - /* A reference temporary. */ - advance_char (dm); - RETURN_IF_ERROR (result_add (dm, "reference temporary for ")); - RETURN_IF_ERROR (demangle_name (dm, &unused)); - break; - - default: - return "Unrecognized ."; - } - } - else if (peek == 'T') - { - status_t status = STATUS_OK; - - /* Other C++ implementation miscellania. Consume the T. */ - advance_char (dm); - - switch (peek_char (dm)) - { - case 'V': - /* Virtual table. */ - advance_char (dm); - RETURN_IF_ERROR (result_add (dm, "vtable for ")); - RETURN_IF_ERROR (demangle_type (dm)); - break; - - case 'T': - /* VTT structure. */ - advance_char (dm); - RETURN_IF_ERROR (result_add (dm, "VTT for ")); - RETURN_IF_ERROR (demangle_type (dm)); - break; - - case 'I': - /* Typeinfo structure. */ - advance_char (dm); - RETURN_IF_ERROR (result_add (dm, "typeinfo for ")); - RETURN_IF_ERROR (demangle_type (dm)); - break; - - case 'F': - /* Typeinfo function. Used only in old ABI with new mangling. */ - advance_char (dm); - RETURN_IF_ERROR (result_add (dm, "typeinfo fn for ")); - RETURN_IF_ERROR (demangle_type (dm)); - break; - - case 'S': - /* Character string containing type name, used in typeinfo. */ - advance_char (dm); - RETURN_IF_ERROR (result_add (dm, "typeinfo name for ")); - RETURN_IF_ERROR (demangle_type (dm)); - break; - - case 'J': - /* The java Class variable corresponding to a C++ class. */ - advance_char (dm); - RETURN_IF_ERROR (result_add (dm, "java Class for ")); - RETURN_IF_ERROR (demangle_type (dm)); - break; - - case 'h': - /* Non-virtual thunk. */ - advance_char (dm); - RETURN_IF_ERROR (result_add (dm, "non-virtual thunk")); - RETURN_IF_ERROR (demangle_nv_offset (dm)); - /* Demangle the separator. */ - RETURN_IF_ERROR (demangle_char (dm, '_')); - /* Demangle and emit the target name and function type. */ - RETURN_IF_ERROR (result_add (dm, " to ")); - RETURN_IF_ERROR (demangle_encoding (dm)); - break; - - case 'v': - /* Virtual thunk. */ - advance_char (dm); - RETURN_IF_ERROR (result_add (dm, "virtual thunk")); - RETURN_IF_ERROR (demangle_v_offset (dm)); - /* Demangle the separator. */ - RETURN_IF_ERROR (demangle_char (dm, '_')); - /* Demangle and emit the target function. */ - RETURN_IF_ERROR (result_add (dm, " to ")); - RETURN_IF_ERROR (demangle_encoding (dm)); - break; - - case 'c': - /* Covariant return thunk. */ - advance_char (dm); - RETURN_IF_ERROR (result_add (dm, "covariant return thunk")); - RETURN_IF_ERROR (demangle_call_offset (dm)); - RETURN_IF_ERROR (demangle_call_offset (dm)); - /* Demangle and emit the target function. */ - RETURN_IF_ERROR (result_add (dm, " to ")); - RETURN_IF_ERROR (demangle_encoding (dm)); - break; - - case 'C': - /* TC is a special g++ mangling for a construction vtable. */ - if (!flag_strict) - { - dyn_string_t derived_type; - - advance_char (dm); - RETURN_IF_ERROR (result_add (dm, "construction vtable for ")); - - /* Demangle the derived type off to the side. */ - RETURN_IF_ERROR (result_push (dm)); - RETURN_IF_ERROR (demangle_type (dm)); - derived_type = (dyn_string_t) result_pop (dm); - - /* Demangle the offset. */ - number = dyn_string_new (4); - if (number == NULL) - { - dyn_string_delete (derived_type); - return STATUS_ALLOCATION_FAILED; - } - demangle_number_literally (dm, number, 10, 1); - /* Demangle the underscore separator. */ - status = demangle_char (dm, '_'); - - /* Demangle the base type. */ - if (STATUS_NO_ERROR (status)) - status = demangle_type (dm); - - /* Emit the derived type. */ - if (STATUS_NO_ERROR (status)) - status = result_add (dm, "-in-"); - if (STATUS_NO_ERROR (status)) - status = result_add_string (dm, derived_type); - dyn_string_delete (derived_type); - - /* Don't display the offset unless in verbose mode. */ - if (flag_verbose) - { - status = result_add_char (dm, ' '); - if (STATUS_NO_ERROR (status)) - result_add_string (dm, number); - } - dyn_string_delete (number); - RETURN_IF_ERROR (status); - break; - } - /* If flag_strict, fall through. */ - - default: - return "Unrecognized ."; - } - } - else - return STATUS_ERROR; - - return STATUS_OK; -} - -/* Demangles and emits a . - - - ::= C1 # complete object (in-charge) ctor - ::= C2 # base object (not-in-charge) ctor - ::= C3 # complete object (in-charge) allocating ctor - ::= D0 # deleting (in-charge) dtor - ::= D1 # complete object (in-charge) dtor - ::= D2 # base object (not-in-charge) dtor */ - -static status_t -demangle_ctor_dtor_name (dm) - demangling_t dm; -{ - static const char *const ctor_flavors[] = - { - "in-charge", - "not-in-charge", - "allocating" - }; - static const char *const dtor_flavors[] = - { - "in-charge deleting", - "in-charge", - "not-in-charge" - }; - - int flavor; - char peek = peek_char (dm); - - DEMANGLE_TRACE ("ctor-dtor-name", dm); - - if (peek == 'C') - { - /* A constructor name. Consume the C. */ - advance_char (dm); - flavor = next_char (dm); - if (flavor < '1' || flavor > '3') - return "Unrecognized constructor."; - RETURN_IF_ERROR (result_add_string (dm, dm->last_source_name)); - switch (flavor) - { - case '1': dm->is_constructor = gnu_v3_complete_object_ctor; - break; - case '2': dm->is_constructor = gnu_v3_base_object_ctor; - break; - case '3': dm->is_constructor = gnu_v3_complete_object_allocating_ctor; - break; - } - /* Print the flavor of the constructor if in verbose mode. */ - if (flag_verbose) - { - RETURN_IF_ERROR (result_add (dm, "[")); - RETURN_IF_ERROR (result_add (dm, ctor_flavors[flavor - '1'])); - RETURN_IF_ERROR (result_add_char (dm, ']')); - } - } - else if (peek == 'D') - { - /* A destructor name. Consume the D. */ - advance_char (dm); - flavor = next_char (dm); - if (flavor < '0' || flavor > '2') - return "Unrecognized destructor."; - RETURN_IF_ERROR (result_add_char (dm, '~')); - RETURN_IF_ERROR (result_add_string (dm, dm->last_source_name)); - switch (flavor) - { - case '0': dm->is_destructor = gnu_v3_deleting_dtor; - break; - case '1': dm->is_destructor = gnu_v3_complete_object_dtor; - break; - case '2': dm->is_destructor = gnu_v3_base_object_dtor; - break; - } - /* Print the flavor of the destructor if in verbose mode. */ - if (flag_verbose) - { - RETURN_IF_ERROR (result_add (dm, " [")); - RETURN_IF_ERROR (result_add (dm, dtor_flavors[flavor - '0'])); - RETURN_IF_ERROR (result_add_char (dm, ']')); - } - } - else - return STATUS_ERROR; - - return STATUS_OK; -} - -/* Handle pointer, reference, and pointer-to-member cases for - demangle_type. All consecutive `P's, `R's, and 'M's are joined to - build a pointer/reference type. We snarf all these, plus the - following , all at once since we need to know whether we have - a pointer to data or pointer to function to construct the right - output syntax. C++'s pointer syntax is hairy. - - This function adds substitution candidates for every nested - pointer/reference type it processes, including the outermost, final - type, assuming the substitution starts at SUBSTITUTION_START in the - demangling result. For example, if this function demangles - `PP3Foo', it will add a substitution for `Foo', `Foo*', and - `Foo**', in that order. - - *INSERT_POS is a quantity used internally, when this function calls - itself recursively, to figure out where to insert pointer - punctuation on the way up. On entry to this function, INSERT_POS - should point to a temporary value, but that value need not be - initialized. - - ::= P - ::= R - ::= - - ::= M */ - -static status_t -demangle_type_ptr (dm, insert_pos, substitution_start) - demangling_t dm; - int *insert_pos; - int substitution_start; -{ - status_t status; - int is_substitution_candidate = 1; - - DEMANGLE_TRACE ("type*", dm); - - /* Scan forward, collecting pointers and references into symbols, - until we hit something else. Then emit the type. */ - switch (peek_char (dm)) - { - case 'P': - /* A pointer. Snarf the `P'. */ - advance_char (dm); - /* Demangle the underlying type. */ - RETURN_IF_ERROR (demangle_type_ptr (dm, insert_pos, - substitution_start)); - /* Insert an asterisk where we're told to; it doesn't - necessarily go at the end. If we're doing Java style output, - there is no pointer symbol. */ - if (dm->style != DMGL_JAVA) - RETURN_IF_ERROR (result_insert_char (dm, *insert_pos, '*')); - /* The next (outermost) pointer or reference character should go - after this one. */ - ++(*insert_pos); - break; - - case 'R': - /* A reference. Snarf the `R'. */ - advance_char (dm); - /* Demangle the underlying type. */ - RETURN_IF_ERROR (demangle_type_ptr (dm, insert_pos, - substitution_start)); - /* Insert an ampersand where we're told to; it doesn't - necessarily go at the end. */ - RETURN_IF_ERROR (result_insert_char (dm, *insert_pos, '&')); - /* The next (outermost) pointer or reference character should go - after this one. */ - ++(*insert_pos); - break; - - case 'M': - { - /* A pointer-to-member. */ - dyn_string_t class_type; - - /* Eat the 'M'. */ - advance_char (dm); - - /* Capture the type of which this is a pointer-to-member. */ - RETURN_IF_ERROR (result_push (dm)); - RETURN_IF_ERROR (demangle_type (dm)); - class_type = (dyn_string_t) result_pop (dm); - - if (peek_char (dm) == 'F') - /* A pointer-to-member function. We want output along the - lines of `void (C::*) (int, int)'. Demangle the function - type, which would in this case give `void () (int, int)' - and set *insert_pos to the spot between the first - parentheses. */ - status = demangle_type_ptr (dm, insert_pos, substitution_start); - else if (peek_char (dm) == 'A') - /* A pointer-to-member array variable. We want output that - looks like `int (Klass::*) [10]'. Demangle the array type - as `int () [10]', and set *insert_pos to the spot between - the parentheses. */ - status = demangle_array_type (dm, insert_pos); - else - { - /* A pointer-to-member variable. Demangle the type of the - pointed-to member. */ - status = demangle_type (dm); - /* Make it pretty. */ - if (STATUS_NO_ERROR (status) - && !result_previous_char_is_space (dm)) - status = result_add_char (dm, ' '); - /* The pointer-to-member notation (e.g. `C::*') follows the - member's type. */ - *insert_pos = result_caret_pos (dm); - } - - /* Build the pointer-to-member notation. */ - if (STATUS_NO_ERROR (status)) - status = result_insert (dm, *insert_pos, "::*"); - if (STATUS_NO_ERROR (status)) - status = result_insert_string (dm, *insert_pos, class_type); - /* There may be additional levels of (pointer or reference) - indirection in this type. If so, the `*' and `&' should be - added after the pointer-to-member notation (e.g. `C::*&' for - a reference to a pointer-to-member of class C). */ - *insert_pos += dyn_string_length (class_type) + 3; - - /* Clean up. */ - dyn_string_delete (class_type); - - RETURN_IF_ERROR (status); - } - break; - - case 'F': - /* Ooh, tricky, a pointer-to-function. When we demangle the - function type, the return type should go at the very - beginning. */ - *insert_pos = result_caret_pos (dm); - /* The parentheses indicate this is a function pointer or - reference type. */ - RETURN_IF_ERROR (result_add (dm, "()")); - /* Now demangle the function type. The return type will be - inserted before the `()', and the argument list will go after - it. */ - RETURN_IF_ERROR (demangle_function_type (dm, insert_pos)); - /* We should now have something along the lines of - `void () (int, int)'. The pointer or reference characters - have to inside the first set of parentheses. *insert_pos has - already been updated to point past the end of the return - type. Move it one character over so it points inside the - `()'. */ - ++(*insert_pos); - break; - - case 'A': - /* An array pointer or reference. demangle_array_type will figure - out where the asterisks and ampersands go. */ - RETURN_IF_ERROR (demangle_array_type (dm, insert_pos)); - break; - - default: - /* No more pointer or reference tokens; this is therefore a - pointer to data. Finish up by demangling the underlying - type. */ - RETURN_IF_ERROR (demangle_type (dm)); - /* The pointer or reference characters follow the underlying - type, as in `int*&'. */ - *insert_pos = result_caret_pos (dm); - /* Because of the production ::= , - demangle_type will already have added the underlying type as - a substitution candidate. Don't do it again. */ - is_substitution_candidate = 0; - break; - } - - if (is_substitution_candidate) - RETURN_IF_ERROR (substitution_add (dm, substitution_start, 0)); - - return STATUS_OK; -} - -/* Demangles and emits a . - - ::= - ::= - ::= - ::= - ::= - ::= - ::= - ::= - ::= P # pointer-to - ::= R # reference-to - ::= C # complex pair (C 2000) - ::= G # imaginary (C 2000) - ::= U # vendor extended type qualifier - ::= */ - -static status_t -demangle_type (dm) - demangling_t dm; -{ - int start = substitution_start (dm); - char peek = peek_char (dm); - char peek_next; - int encode_return_type = 0; - template_arg_list_t old_arg_list = current_template_arg_list (dm); - int insert_pos; - - /* A can be a ; therefore, this is a - substitution candidate unless a special condition holds (see - below). */ - int is_substitution_candidate = 1; - - DEMANGLE_TRACE ("type", dm); - - /* A can start with a digit (a ), an - N (a ), or a Z (a ). */ - if (IS_DIGIT ((unsigned char) peek) || peek == 'N' || peek == 'Z') - RETURN_IF_ERROR (demangle_class_enum_type (dm, &encode_return_type)); - /* Lower-case letters begin s, except for `r', which - denotes restrict. */ - else if (peek >= 'a' && peek <= 'z' && peek != 'r') - { - RETURN_IF_ERROR (demangle_builtin_type (dm)); - /* Built-in types are not substitution candidates. */ - is_substitution_candidate = 0; - } - else - switch (peek) - { - case 'r': - case 'V': - case 'K': - /* CV-qualifiers (including restrict). We have to demangle - them off to the side, since C++ syntax puts them in a funny - place for qualified pointer and reference types. */ - { - status_t status; - dyn_string_t cv_qualifiers = dyn_string_new (24); - int old_caret_position = result_get_caret (dm); - - if (cv_qualifiers == NULL) - return STATUS_ALLOCATION_FAILED; - - /* Decode all adjacent CV qualifiers. */ - demangle_CV_qualifiers (dm, cv_qualifiers); - /* Emit them, and shift the caret left so that the - underlying type will be emitted before the qualifiers. */ - status = result_add_string (dm, cv_qualifiers); - result_shift_caret (dm, -dyn_string_length (cv_qualifiers)); - /* Clean up. */ - dyn_string_delete (cv_qualifiers); - RETURN_IF_ERROR (status); - /* Also prepend a blank, if needed. */ - RETURN_IF_ERROR (result_add_char (dm, ' ')); - result_shift_caret (dm, -1); - - /* Demangle the underlying type. It will be emitted before - the CV qualifiers, since we moved the caret. */ - RETURN_IF_ERROR (demangle_type (dm)); - - /* Put the caret back where it was previously. */ - result_set_caret (dm, old_caret_position); - } - break; - - case 'F': - return "Non-pointer or -reference function type."; - - case 'A': - RETURN_IF_ERROR (demangle_array_type (dm, NULL)); - break; - - case 'T': - /* It's either a or a - . In either case, demangle the - `T' token first. */ - RETURN_IF_ERROR (demangle_template_param (dm)); - - /* Check for a template argument list; if one is found, it's a - ::= - ::= */ - if (peek_char (dm) == 'I') - { - /* Add a substitution candidate. The template parameter - `T' token is a substitution candidate by itself, - without the template argument list. */ - RETURN_IF_ERROR (substitution_add (dm, start, encode_return_type)); - - /* Now demangle the template argument list. */ - RETURN_IF_ERROR (demangle_template_args (dm)); - /* The entire type, including the template template - parameter and its argument list, will be added as a - substitution candidate below. */ - } - - break; - - case 'S': - /* First check if this is a special substitution. If it is, - this is a . Special substitutions have a - letter following the `S'; other substitutions have a digit - or underscore. */ - peek_next = peek_char_next (dm); - if (IS_DIGIT (peek_next) || peek_next == '_') - { - RETURN_IF_ERROR (demangle_substitution (dm, &encode_return_type)); - - /* The substituted name may have been a template name. - Check if template arguments follow, and if so, demangle - them. */ - if (peek_char (dm) == 'I') - RETURN_IF_ERROR (demangle_template_args (dm)); - else - /* A substitution token is not itself a substitution - candidate. (However, if the substituted template is - instantiated, the resulting type is.) */ - is_substitution_candidate = 0; - } - else - { - /* Now some trickiness. We have a special substitution - here. Often, the special substitution provides the - name of a template that's subsequently instantiated, - for instance `SaIcE' => std::allocator. In these - cases we need to add a substitution candidate for the - entire and thus don't want to clear - the is_substitution_candidate flag. - - However, it's possible that what we have here is a - substitution token representing an entire type, such as - `Ss' => std::string. In this case, we mustn't add a - new substitution candidate for this substitution token. - To detect this case, remember where the start of the - substitution token is. */ - const char *next = dm->next; - /* Now demangle the . */ - RETURN_IF_ERROR - (demangle_class_enum_type (dm, &encode_return_type)); - /* If all that was just demangled is the two-character - special substitution token, supress the addition of a - new candidate for it. */ - if (dm->next == next + 2) - is_substitution_candidate = 0; - } - - break; - - case 'P': - case 'R': - case 'M': - RETURN_IF_ERROR (demangle_type_ptr (dm, &insert_pos, start)); - /* demangle_type_ptr adds all applicable substitution - candidates. */ - is_substitution_candidate = 0; - break; - - case 'C': - /* A C99 complex type. */ - RETURN_IF_ERROR (result_add (dm, "complex ")); - advance_char (dm); - RETURN_IF_ERROR (demangle_type (dm)); - break; - - case 'G': - /* A C99 imaginary type. */ - RETURN_IF_ERROR (result_add (dm, "imaginary ")); - advance_char (dm); - RETURN_IF_ERROR (demangle_type (dm)); - break; - - case 'U': - /* Vendor-extended type qualifier. */ - advance_char (dm); - RETURN_IF_ERROR (demangle_source_name (dm)); - RETURN_IF_ERROR (result_add_char (dm, ' ')); - RETURN_IF_ERROR (demangle_type (dm)); - break; - - default: - return "Unexpected character in ."; - } - - if (is_substitution_candidate) - /* Add a new substitution for the type. If this type was a - , pass its index since from the point of - substitutions; a token is a substitution - candidate distinct from the type that is substituted for it. */ - RETURN_IF_ERROR (substitution_add (dm, start, encode_return_type)); - - /* Pop off template argument lists added during mangling of this - type. */ - pop_to_template_arg_list (dm, old_arg_list); - - return STATUS_OK; -} - -/* C++ source names of builtin types, indexed by the mangled code - letter's position in the alphabet ('a' -> 0, 'b' -> 1, etc). */ -static const char *const builtin_type_names[26] = -{ - "signed char", /* a */ - "bool", /* b */ - "char", /* c */ - "double", /* d */ - "long double", /* e */ - "float", /* f */ - "__float128", /* g */ - "unsigned char", /* h */ - "int", /* i */ - "unsigned", /* j */ - NULL, /* k */ - "long", /* l */ - "unsigned long", /* m */ - "__int128", /* n */ - "unsigned __int128", /* o */ - NULL, /* p */ - NULL, /* q */ - NULL, /* r */ - "short", /* s */ - "unsigned short", /* t */ - NULL, /* u */ - "void", /* v */ - "wchar_t", /* w */ - "long long", /* x */ - "unsigned long long", /* y */ - "..." /* z */ -}; - -/* Java source names of builtin types. Types that arn't valid in Java - are also included here - we don't fail if someone attempts to demangle a - C++ symbol in Java style. */ -static const char *const java_builtin_type_names[26] = -{ - "signed char", /* a */ - "boolean", /* C++ "bool" */ /* b */ - "byte", /* C++ "char" */ /* c */ - "double", /* d */ - "long double", /* e */ - "float", /* f */ - "__float128", /* g */ - "unsigned char", /* h */ - "int", /* i */ - "unsigned", /* j */ - NULL, /* k */ - "long", /* l */ - "unsigned long", /* m */ - "__int128", /* n */ - "unsigned __int128", /* o */ - NULL, /* p */ - NULL, /* q */ - NULL, /* r */ - "short", /* s */ - "unsigned short", /* t */ - NULL, /* u */ - "void", /* v */ - "char", /* C++ "wchar_t" */ /* w */ - "long", /* C++ "long long" */ /* x */ - "unsigned long long", /* y */ - "..." /* z */ -}; - -/* Demangles and emits a . - - ::= v # void - ::= w # wchar_t - ::= b # bool - ::= c # char - ::= a # signed char - ::= h # unsigned char - ::= s # short - ::= t # unsigned short - ::= i # int - ::= j # unsigned int - ::= l # long - ::= m # unsigned long - ::= x # long long, __int64 - ::= y # unsigned long long, __int64 - ::= n # __int128 - ::= o # unsigned __int128 - ::= f # float - ::= d # double - ::= e # long double, __float80 - ::= g # __float128 - ::= z # ellipsis - ::= u # vendor extended type */ - -static status_t -demangle_builtin_type (dm) - demangling_t dm; -{ - - char code = peek_char (dm); - - DEMANGLE_TRACE ("builtin-type", dm); - - if (code == 'u') - { - advance_char (dm); - RETURN_IF_ERROR (demangle_source_name (dm)); - return STATUS_OK; - } - else if (code >= 'a' && code <= 'z') - { - const char *type_name; - /* Java uses different names for some built-in types. */ - if (dm->style == DMGL_JAVA) - type_name = java_builtin_type_names[code - 'a']; - else - type_name = builtin_type_names[code - 'a']; - if (type_name == NULL) - return "Unrecognized code."; - - RETURN_IF_ERROR (result_add (dm, type_name)); - advance_char (dm); - return STATUS_OK; - } - else - return "Non-alphabetic code."; -} - -/* Demangles all consecutive CV-qualifiers (const, volatile, and - restrict) at the current position. The qualifiers are appended to - QUALIFIERS. Returns STATUS_OK. */ - -static status_t -demangle_CV_qualifiers (dm, qualifiers) - demangling_t dm; - dyn_string_t qualifiers; -{ - DEMANGLE_TRACE ("CV-qualifiers", dm); - - while (1) - { - switch (peek_char (dm)) - { - case 'r': - if (!dyn_string_append_space (qualifiers)) - return STATUS_ALLOCATION_FAILED; - if (!dyn_string_append_cstr (qualifiers, "restrict")) - return STATUS_ALLOCATION_FAILED; - break; - - case 'V': - if (!dyn_string_append_space (qualifiers)) - return STATUS_ALLOCATION_FAILED; - if (!dyn_string_append_cstr (qualifiers, "volatile")) - return STATUS_ALLOCATION_FAILED; - break; - - case 'K': - if (!dyn_string_append_space (qualifiers)) - return STATUS_ALLOCATION_FAILED; - if (!dyn_string_append_cstr (qualifiers, "const")) - return STATUS_ALLOCATION_FAILED; - break; - - default: - return STATUS_OK; - } - - advance_char (dm); - } -} - -/* Demangles and emits a . *FUNCTION_NAME_POS is the - position in the result string of the start of the function - identifier, at which the function's return type will be inserted; - *FUNCTION_NAME_POS is updated to position past the end of the - function's return type. - - ::= F [Y] E */ - -static status_t -demangle_function_type (dm, function_name_pos) - demangling_t dm; - int *function_name_pos; -{ - DEMANGLE_TRACE ("function-type", dm); - RETURN_IF_ERROR (demangle_char (dm, 'F')); - if (peek_char (dm) == 'Y') - { - /* Indicate this function has C linkage if in verbose mode. */ - if (flag_verbose) - RETURN_IF_ERROR (result_add (dm, " [extern \"C\"] ")); - advance_char (dm); - } - RETURN_IF_ERROR (demangle_bare_function_type (dm, function_name_pos)); - RETURN_IF_ERROR (demangle_char (dm, 'E')); - return STATUS_OK; -} - -/* Demangles and emits a . RETURN_TYPE_POS is the - position in the result string at which the function return type - should be inserted. If RETURN_TYPE_POS is BFT_NO_RETURN_TYPE, the - function's return type is assumed not to be encoded. - - ::= + */ - -static status_t -demangle_bare_function_type (dm, return_type_pos) - demangling_t dm; - int *return_type_pos; -{ - /* Sequence is the index of the current function parameter, counting - from zero. The value -1 denotes the return type. */ - int sequence = - (return_type_pos == BFT_NO_RETURN_TYPE ? 0 : -1); - - DEMANGLE_TRACE ("bare-function-type", dm); - - RETURN_IF_ERROR (result_add_char (dm, '(')); - while (!end_of_name_p (dm) && peek_char (dm) != 'E') - { - if (sequence == -1) - /* We're decoding the function's return type. */ - { - dyn_string_t return_type; - status_t status = STATUS_OK; - - /* Decode the return type off to the side. */ - RETURN_IF_ERROR (result_push (dm)); - RETURN_IF_ERROR (demangle_type (dm)); - return_type = (dyn_string_t) result_pop (dm); - - /* Add a space to the end of the type. Insert the return - type where we've been asked to. */ - if (!dyn_string_append_space (return_type)) - status = STATUS_ALLOCATION_FAILED; - if (STATUS_NO_ERROR (status)) - { - if (!dyn_string_insert (result_string (dm), *return_type_pos, - return_type)) - status = STATUS_ALLOCATION_FAILED; - else - *return_type_pos += dyn_string_length (return_type); - } - - dyn_string_delete (return_type); - RETURN_IF_ERROR (status); - } - else - { - /* Skip `void' parameter types. One should only occur as - the only type in a parameter list; in that case, we want - to print `foo ()' instead of `foo (void)'. */ - if (peek_char (dm) == 'v') - /* Consume the v. */ - advance_char (dm); - else - { - /* Separate parameter types by commas. */ - if (sequence > 0) - RETURN_IF_ERROR (result_add (dm, ", ")); - /* Demangle the type. */ - RETURN_IF_ERROR (demangle_type (dm)); - } - } - - ++sequence; - } - RETURN_IF_ERROR (result_add_char (dm, ')')); - - /* We should have demangled at least one parameter type (which would - be void, for a function that takes no parameters), plus the - return type, if we were supposed to demangle that. */ - if (sequence == -1) - return "Missing function return type."; - else if (sequence == 0) - return "Missing function parameter."; - - return STATUS_OK; -} - -/* Demangles and emits a . *ENCODE_RETURN_TYPE is set to - non-zero if the type is a template-id, zero otherwise. - - ::= */ - -static status_t -demangle_class_enum_type (dm, encode_return_type) - demangling_t dm; - int *encode_return_type; -{ - DEMANGLE_TRACE ("class-enum-type", dm); - - RETURN_IF_ERROR (demangle_name (dm, encode_return_type)); - return STATUS_OK; -} - -/* Demangles and emits an . - - If PTR_INSERT_POS is not NULL, the array type is formatted as a - pointer or reference to an array, except that asterisk and - ampersand punctuation is omitted (since it's not know at this - point). *PTR_INSERT_POS is set to the position in the demangled - name at which this punctuation should be inserted. For example, - `A10_i' is demangled to `int () [10]' and *PTR_INSERT_POS points - between the parentheses. - - If PTR_INSERT_POS is NULL, the array type is assumed not to be - pointer- or reference-qualified. Then, for example, `A10_i' is - demangled simply as `int[10]'. - - ::= A [] _ - ::= A _ */ - -static status_t -demangle_array_type (dm, ptr_insert_pos) - demangling_t dm; - int *ptr_insert_pos; -{ - status_t status = STATUS_OK; - dyn_string_t array_size = NULL; - char peek; - - DEMANGLE_TRACE ("array-type", dm); - - RETURN_IF_ERROR (demangle_char (dm, 'A')); - - /* Demangle the array size into array_size. */ - peek = peek_char (dm); - if (peek == '_') - /* Array bound is omitted. This is a C99-style VLA. */ - ; - else if (IS_DIGIT (peek_char (dm))) - { - /* It looks like a constant array bound. */ - array_size = dyn_string_new (10); - if (array_size == NULL) - return STATUS_ALLOCATION_FAILED; - status = demangle_number_literally (dm, array_size, 10, 0); - } - else - { - /* Anything is must be an expression for a nont-constant array - bound. This happens if the array type occurs in a template - and the array bound references a template parameter. */ - RETURN_IF_ERROR (result_push (dm)); - RETURN_IF_ERROR (demangle_expression_v3 (dm)); - array_size = (dyn_string_t) result_pop (dm); - } - /* array_size may have been allocated by now, so we can't use - RETURN_IF_ERROR until it's been deallocated. */ - - /* Demangle the base type of the array. */ - if (STATUS_NO_ERROR (status)) - status = demangle_char (dm, '_'); - if (STATUS_NO_ERROR (status)) - status = demangle_type (dm); - - if (ptr_insert_pos != NULL) - { - /* This array is actually part of an pointer- or - reference-to-array type. Format appropriately, except we - don't know which and how much punctuation to use. */ - if (STATUS_NO_ERROR (status)) - status = result_add (dm, " () "); - /* Let the caller know where to insert the punctuation. */ - *ptr_insert_pos = result_caret_pos (dm) - 2; - } - - /* Emit the array dimension syntax. */ - if (STATUS_NO_ERROR (status)) - status = result_add_char (dm, '['); - if (STATUS_NO_ERROR (status) && array_size != NULL) - status = result_add_string (dm, array_size); - if (STATUS_NO_ERROR (status)) - status = result_add_char (dm, ']'); - if (array_size != NULL) - dyn_string_delete (array_size); - - RETURN_IF_ERROR (status); - - return STATUS_OK; -} - -/* Demangles and emits a . - - ::= T_ # first template parameter - ::= T _ */ - -static status_t -demangle_template_param (dm) - demangling_t dm; -{ - int parm_number; - template_arg_list_t current_arg_list = current_template_arg_list (dm); - string_list_t arg; - - DEMANGLE_TRACE ("template-param", dm); - - /* Make sure there is a template argmust list in which to look up - this parameter reference. */ - if (current_arg_list == NULL) - return "Template parameter outside of template."; - - RETURN_IF_ERROR (demangle_char (dm, 'T')); - if (peek_char (dm) == '_') - parm_number = 0; - else - { - RETURN_IF_ERROR (demangle_number (dm, &parm_number, 10, 0)); - ++parm_number; - } - RETURN_IF_ERROR (demangle_char (dm, '_')); - - arg = template_arg_list_get_arg (current_arg_list, parm_number); - if (arg == NULL) - /* parm_number exceeded the number of arguments in the current - template argument list. */ - return "Template parameter number out of bounds."; - RETURN_IF_ERROR (result_add_string (dm, (dyn_string_t) arg)); - - return STATUS_OK; -} - -/* Demangles and emits a . - - ::= I + E */ - -static status_t -demangle_template_args_1 (dm, arg_list) - demangling_t dm; - template_arg_list_t arg_list; -{ - int first = 1; - - DEMANGLE_TRACE ("template-args", dm); - - RETURN_IF_ERROR (demangle_char (dm, 'I')); - RETURN_IF_ERROR (result_open_template_list (dm)); - do - { - string_list_t arg; - - if (first) - first = 0; - else - RETURN_IF_ERROR (result_add (dm, ", ")); - - /* Capture the template arg. */ - RETURN_IF_ERROR (result_push (dm)); - RETURN_IF_ERROR (demangle_template_arg (dm)); - arg = result_pop (dm); - - /* Emit it in the demangled name. */ - RETURN_IF_ERROR (result_add_string (dm, (dyn_string_t) arg)); - - /* Save it for use in expanding s. */ - template_arg_list_add_arg (arg_list, arg); - } - while (peek_char (dm) != 'E'); - /* Append the '>'. */ - RETURN_IF_ERROR (result_close_template_list (dm)); - - /* Consume the 'E'. */ - advance_char (dm); - - return STATUS_OK; -} - -static status_t -demangle_template_args (dm) - demangling_t dm; -{ - int first = 1; - dyn_string_t old_last_source_name; - dyn_string_t new_name; - template_arg_list_t arg_list = template_arg_list_new (); - status_t status; - - if (arg_list == NULL) - return STATUS_ALLOCATION_FAILED; - - /* Preserve the most recently demangled source name. */ - old_last_source_name = dm->last_source_name; - new_name = dyn_string_new (0); - - if (new_name == NULL) - { - template_arg_list_delete (arg_list); - return STATUS_ALLOCATION_FAILED; - } - - dm->last_source_name = new_name; - - status = demangle_template_args_1 (dm, arg_list); - /* Restore the most recent demangled source name. */ - dyn_string_delete (dm->last_source_name); - dm->last_source_name = old_last_source_name; - - if (!STATUS_NO_ERROR (status)) - { - template_arg_list_delete (arg_list); - return status; - } - - /* Push the list onto the top of the stack of template argument - lists, so that arguments from it are used from now on when - expanding s. */ - push_template_arg_list (dm, arg_list); - - return STATUS_OK; -} - -/* This function, which does not correspond to a production in the - mangling spec, handles the `literal' production for both - and . It does not expect or consume - the initial `L' or final `E'. The demangling is given by: - - ::= - - and the emitted output is `(type)number'. */ - -static status_t -demangle_literal (dm) - demangling_t dm; -{ - char peek = peek_char (dm); - dyn_string_t value_string; - status_t status; - - DEMANGLE_TRACE ("literal", dm); - - if (!flag_verbose && peek >= 'a' && peek <= 'z') - { - /* If not in verbose mode and this is a builtin type, see if we - can produce simpler numerical output. In particular, for - integer types shorter than `long', just write the number - without type information; for bools, write `true' or `false'. - Other refinements could be made here too. */ - - /* This constant string is used to map from codes - (26 letters of the alphabet) to codes that determine how the - value will be displayed. The codes are: - b: display as bool - i: display as int - l: display as long - A space means the value will be represented using cast - notation. */ - static const char *const code_map = "ibi iii ll ii i "; - - char code = code_map[peek - 'a']; - /* FIXME: Implement demangling of floats and doubles. */ - if (code == 'u') - return STATUS_UNIMPLEMENTED; - if (code == 'b') - { - /* It's a boolean. */ - char value; - - /* Consume the b. */ - advance_char (dm); - /* Look at the next character. It should be 0 or 1, - corresponding to false or true, respectively. */ - value = peek_char (dm); - if (value == '0') - RETURN_IF_ERROR (result_add (dm, "false")); - else if (value == '1') - RETURN_IF_ERROR (result_add (dm, "true")); - else - return "Unrecognized bool constant."; - /* Consume the 0 or 1. */ - advance_char (dm); - return STATUS_OK; - } - else if (code == 'i' || code == 'l') - { - /* It's an integer or long. */ - - /* Consume the type character. */ - advance_char (dm); - - /* Demangle the number and write it out. */ - value_string = dyn_string_new (0); - status = demangle_number_literally (dm, value_string, 10, 1); - if (STATUS_NO_ERROR (status)) - status = result_add_string (dm, value_string); - /* For long integers, append an l. */ - if (code == 'l' && STATUS_NO_ERROR (status)) - status = result_add_char (dm, code); - dyn_string_delete (value_string); - - RETURN_IF_ERROR (status); - return STATUS_OK; - } - /* ...else code == ' ', so fall through to represent this - literal's type explicitly using cast syntax. */ - } - - RETURN_IF_ERROR (result_add_char (dm, '(')); - RETURN_IF_ERROR (demangle_type (dm)); - RETURN_IF_ERROR (result_add_char (dm, ')')); - - value_string = dyn_string_new (0); - if (value_string == NULL) - return STATUS_ALLOCATION_FAILED; - - status = demangle_number_literally (dm, value_string, 10, 1); - if (STATUS_NO_ERROR (status)) - status = result_add_string (dm, value_string); - dyn_string_delete (value_string); - RETURN_IF_ERROR (status); - - return STATUS_OK; -} - -/* Demangles and emits a . - - ::= # type - ::= L E # literal - ::= LZ E # external name - ::= X E # expression */ - -static status_t -demangle_template_arg (dm) - demangling_t dm; -{ - DEMANGLE_TRACE ("template-arg", dm); - - switch (peek_char (dm)) - { - case 'L': - advance_char (dm); - - if (peek_char (dm) == 'Z') - { - /* External name. */ - advance_char (dm); - /* FIXME: Standard is contradictory here. */ - RETURN_IF_ERROR (demangle_encoding (dm)); - } - else - RETURN_IF_ERROR (demangle_literal (dm)); - RETURN_IF_ERROR (demangle_char (dm, 'E')); - break; - - case 'X': - /* Expression. */ - advance_char (dm); - RETURN_IF_ERROR (demangle_expression_v3 (dm)); - RETURN_IF_ERROR (demangle_char (dm, 'E')); - break; - - default: - RETURN_IF_ERROR (demangle_type (dm)); - break; - } - - return STATUS_OK; -} - -/* Demangles and emits an . - - ::= - ::= - ::= - ::= */ - -static status_t -demangle_expression_v3 (dm) - demangling_t dm; -{ - char peek = peek_char (dm); - - DEMANGLE_TRACE ("expression", dm); - - if (peek == 'L' || peek == 'T') - RETURN_IF_ERROR (demangle_expr_primary (dm)); - else if (peek == 's' && peek_char_next (dm) == 'r') - RETURN_IF_ERROR (demangle_scope_expression (dm)); - else - /* An operator expression. */ - { - int num_args; - status_t status = STATUS_OK; - dyn_string_t operator_name; - - /* We have an operator name. Since we want to output binary - operations in infix notation, capture the operator name - first. */ - RETURN_IF_ERROR (result_push (dm)); - RETURN_IF_ERROR (demangle_operator_name (dm, 1, &num_args)); - operator_name = (dyn_string_t) result_pop (dm); - - /* If it's binary, do an operand first. */ - if (num_args > 1) - { - status = result_add_char (dm, '('); - if (STATUS_NO_ERROR (status)) - status = demangle_expression_v3 (dm); - if (STATUS_NO_ERROR (status)) - status = result_add_char (dm, ')'); - } - - /* Emit the operator. */ - if (STATUS_NO_ERROR (status)) - status = result_add_string (dm, operator_name); - dyn_string_delete (operator_name); - RETURN_IF_ERROR (status); - - /* Emit its second (if binary) or only (if unary) operand. */ - RETURN_IF_ERROR (result_add_char (dm, '(')); - RETURN_IF_ERROR (demangle_expression_v3 (dm)); - RETURN_IF_ERROR (result_add_char (dm, ')')); - - /* The ternary operator takes a third operand. */ - if (num_args == 3) - { - RETURN_IF_ERROR (result_add (dm, ":(")); - RETURN_IF_ERROR (demangle_expression_v3 (dm)); - RETURN_IF_ERROR (result_add_char (dm, ')')); - } - } - - return STATUS_OK; -} - -/* Demangles and emits a . - - ::= sr - ::= sr */ - -static status_t -demangle_scope_expression (dm) - demangling_t dm; -{ - RETURN_IF_ERROR (demangle_char (dm, 's')); - RETURN_IF_ERROR (demangle_char (dm, 'r')); - RETURN_IF_ERROR (demangle_type (dm)); - RETURN_IF_ERROR (result_add (dm, "::")); - RETURN_IF_ERROR (demangle_encoding (dm)); - return STATUS_OK; -} - -/* Demangles and emits an . - - ::= - ::= L E # literal - ::= L E # external name */ - -static status_t -demangle_expr_primary (dm) - demangling_t dm; -{ - char peek = peek_char (dm); - - DEMANGLE_TRACE ("expr-primary", dm); - - if (peek == 'T') - RETURN_IF_ERROR (demangle_template_param (dm)); - else if (peek == 'L') - { - /* Consume the `L'. */ - advance_char (dm); - peek = peek_char (dm); - - if (peek == '_') - RETURN_IF_ERROR (demangle_mangled_name (dm)); - else - RETURN_IF_ERROR (demangle_literal (dm)); - - RETURN_IF_ERROR (demangle_char (dm, 'E')); - } - else - return STATUS_ERROR; - - return STATUS_OK; -} - -/* Demangles and emits a . Sets *TEMPLATE_P to non-zero - if the substitution is the name of a template, zero otherwise. - - ::= S _ - ::= S_ - - ::= St # ::std:: - ::= Sa # ::std::allocator - ::= Sb # ::std::basic_string - ::= Ss # ::std::basic_string, - ::std::allocator > - ::= Si # ::std::basic_istream > - ::= So # ::std::basic_ostream > - ::= Sd # ::std::basic_iostream > -*/ - -static status_t -demangle_substitution (dm, template_p) - demangling_t dm; - int *template_p; -{ - int seq_id; - int peek; - dyn_string_t text; - - DEMANGLE_TRACE ("substitution", dm); - - RETURN_IF_ERROR (demangle_char (dm, 'S')); - - /* Scan the substitution sequence index. A missing number denotes - the first index. */ - peek = peek_char (dm); - if (peek == '_') - seq_id = -1; - /* If the following character is 0-9 or a capital letter, interpret - the sequence up to the next underscore as a base-36 substitution - index. */ - else if (IS_DIGIT ((unsigned char) peek) - || (peek >= 'A' && peek <= 'Z')) - RETURN_IF_ERROR (demangle_number (dm, &seq_id, 36, 0)); - else - { - const char *new_last_source_name = NULL; - - switch (peek) - { - case 't': - RETURN_IF_ERROR (result_add (dm, "std")); - break; - - case 'a': - RETURN_IF_ERROR (result_add (dm, "std::allocator")); - new_last_source_name = "allocator"; - *template_p = 1; - break; - - case 'b': - RETURN_IF_ERROR (result_add (dm, "std::basic_string")); - new_last_source_name = "basic_string"; - *template_p = 1; - break; - - case 's': - if (!flag_verbose) - { - RETURN_IF_ERROR (result_add (dm, "std::string")); - new_last_source_name = "string"; - } - else - { - RETURN_IF_ERROR (result_add (dm, "std::basic_string, std::allocator >")); - new_last_source_name = "basic_string"; - } - *template_p = 0; - break; - - case 'i': - if (!flag_verbose) - { - RETURN_IF_ERROR (result_add (dm, "std::istream")); - new_last_source_name = "istream"; - } - else - { - RETURN_IF_ERROR (result_add (dm, "std::basic_istream >")); - new_last_source_name = "basic_istream"; - } - *template_p = 0; - break; - - case 'o': - if (!flag_verbose) - { - RETURN_IF_ERROR (result_add (dm, "std::ostream")); - new_last_source_name = "ostream"; - } - else - { - RETURN_IF_ERROR (result_add (dm, "std::basic_ostream >")); - new_last_source_name = "basic_ostream"; - } - *template_p = 0; - break; - - case 'd': - if (!flag_verbose) - { - RETURN_IF_ERROR (result_add (dm, "std::iostream")); - new_last_source_name = "iostream"; - } - else - { - RETURN_IF_ERROR (result_add (dm, "std::basic_iostream >")); - new_last_source_name = "basic_iostream"; - } - *template_p = 0; - break; - - default: - return "Unrecognized ."; - } - - /* Consume the character we just processed. */ - advance_char (dm); - - if (new_last_source_name != NULL) - { - if (!dyn_string_copy_cstr (dm->last_source_name, - new_last_source_name)) - return STATUS_ALLOCATION_FAILED; - } - - return STATUS_OK; - } - - /* Look up the substitution text. Since `S_' is the most recent - substitution, `S0_' is the second-most-recent, etc., shift the - numbering by one. */ - text = substitution_get (dm, seq_id + 1, template_p); - if (text == NULL) - return "Substitution number out of range."; - - /* Emit the substitution text. */ - RETURN_IF_ERROR (result_add_string (dm, text)); - - RETURN_IF_ERROR (demangle_char (dm, '_')); - return STATUS_OK; -} - -/* Demangles and emits a . - - := Z E [] - := Z E s [] */ - -static status_t -demangle_local_name (dm) - demangling_t dm; -{ - DEMANGLE_TRACE ("local-name", dm); - - RETURN_IF_ERROR (demangle_char (dm, 'Z')); - RETURN_IF_ERROR (demangle_encoding (dm)); - RETURN_IF_ERROR (demangle_char (dm, 'E')); - RETURN_IF_ERROR (result_add (dm, "::")); - - if (peek_char (dm) == 's') - { - /* Local character string literal. */ - RETURN_IF_ERROR (result_add (dm, "string literal")); - /* Consume the s. */ - advance_char (dm); - RETURN_IF_ERROR (demangle_discriminator (dm, 0)); - } - else - { - int unused; - /* Local name for some other entity. Demangle its name. */ - RETURN_IF_ERROR (demangle_name (dm, &unused)); - RETURN_IF_ERROR (demangle_discriminator (dm, 1)); - } - - return STATUS_OK; - } - - /* Optimonally demangles and emits a . If there is no - at the current position in the mangled string, the - descriminator is assumed to be zero. Emit the discriminator number - in parentheses, unless SUPPRESS_FIRST is non-zero and the - discriminator is zero. - - ::= _ */ - -static status_t -demangle_discriminator (dm, suppress_first) - demangling_t dm; - int suppress_first; -{ - /* Output for s to the demangled name is completely - suppressed if not in verbose mode. */ - - if (peek_char (dm) == '_') - { - /* Consume the underscore. */ - advance_char (dm); - if (flag_verbose) - RETURN_IF_ERROR (result_add (dm, " [#")); - /* Check if there's a number following the underscore. */ - if (IS_DIGIT ((unsigned char) peek_char (dm))) - { - int discriminator; - /* Demangle the number. */ - RETURN_IF_ERROR (demangle_number (dm, &discriminator, 10, 0)); - if (flag_verbose) - /* Write the discriminator. The mangled number is two - less than the discriminator ordinal, counting from - zero. */ - RETURN_IF_ERROR (int_to_dyn_string (discriminator + 1, - (dyn_string_t) dm->result)); - } - else - return STATUS_ERROR; - if (flag_verbose) - RETURN_IF_ERROR (result_add_char (dm, ']')); - } - else if (!suppress_first) - { - if (flag_verbose) - RETURN_IF_ERROR (result_add (dm, " [#0]")); - } - - return STATUS_OK; -} - -/* Demangle NAME into RESULT, which must be an initialized - dyn_string_t. On success, returns STATUS_OK. On failure, returns - an error message, and the contents of RESULT are unchanged. */ - -static status_t -cp_demangle (name, result, style) - const char *name; - dyn_string_t result; - int style; -{ - status_t status; - int length = VG_(strlen) (name); - - if (length > 2 && name[0] == '_' && name[1] == 'Z') - { - demangling_t dm = demangling_new (name, style); - if (dm == NULL) - return STATUS_ALLOCATION_FAILED; - - status = result_push (dm); - if (status != STATUS_OK) - { - demangling_delete (dm); - return status; - } - - status = demangle_mangled_name (dm); - if (STATUS_NO_ERROR (status)) - { - dyn_string_t demangled = (dyn_string_t) result_pop (dm); - if (!dyn_string_copy (result, demangled)) - { - demangling_delete (dm); - return STATUS_ALLOCATION_FAILED; - } - dyn_string_delete (demangled); - } - - demangling_delete (dm); - } - else - { - /* It's evidently not a mangled C++ name. It could be the name - of something with C linkage, though, so just copy NAME into - RESULT. */ - if (!dyn_string_copy_cstr (result, name)) - return STATUS_ALLOCATION_FAILED; - status = STATUS_OK; - } - - return status; -} - -/* Demangle TYPE_NAME into RESULT, which must be an initialized - dyn_string_t. On success, returns STATUS_OK. On failiure, returns - an error message, and the contents of RESULT are unchanged. */ - -#ifdef IN_LIBGCC2 -static status_t -cp_demangle_type (type_name, result) - const char* type_name; - dyn_string_t result; -{ - status_t status; - demangling_t dm = demangling_new (type_name); - - if (dm == NULL) - return STATUS_ALLOCATION_FAILED; - - /* Demangle the type name. The demangled name is stored in dm. */ - status = result_push (dm); - if (status != STATUS_OK) - { - demangling_delete (dm); - return status; - } - - status = demangle_type (dm); - - if (STATUS_NO_ERROR (status)) - { - /* The demangling succeeded. Pop the result out of dm and copy - it into RESULT. */ - dyn_string_t demangled = (dyn_string_t) result_pop (dm); - if (!dyn_string_copy (result, demangled)) - return STATUS_ALLOCATION_FAILED; - dyn_string_delete (demangled); - } - - /* Clean up. */ - demangling_delete (dm); - - return status; -} - -extern char *__cxa_demangle PARAMS ((const char *, char *, size_t *, int *)); - -/* ia64 ABI-mandated entry point in the C++ runtime library for performing - demangling. MANGLED_NAME is a NUL-terminated character string - containing the name to be demangled. - - OUTPUT_BUFFER is a region of memory, allocated with malloc, of - *LENGTH bytes, into which the demangled name is stored. If - OUTPUT_BUFFER is not long enough, it is expanded using realloc. - OUTPUT_BUFFER may instead be NULL; in that case, the demangled name - is placed in a region of memory allocated with malloc. - - If LENGTH is non-NULL, the length of the buffer conaining the - demangled name, is placed in *LENGTH. - - The return value is a pointer to the start of the NUL-terminated - demangled name, or NULL if the demangling fails. The caller is - responsible for deallocating this memory using free. - - *STATUS is set to one of the following values: - 0: The demangling operation succeeded. - -1: A memory allocation failiure occurred. - -2: MANGLED_NAME is not a valid name under the C++ ABI mangling rules. - -3: One of the arguments is invalid. - - The demagling is performed using the C++ ABI mangling rules, with - GNU extensions. */ - -char * -__cxa_demangle (mangled_name, output_buffer, length, status) - const char *mangled_name; - char *output_buffer; - size_t *length; - int *status; -{ - struct dyn_string demangled_name; - status_t result; - - if (status == NULL) - return NULL; - - if (mangled_name == NULL) { - *status = -3; - return NULL; - } - - /* Did the caller provide a buffer for the demangled name? */ - if (output_buffer == NULL) { - /* No; dyn_string will malloc a buffer for us. */ - if (!dyn_string_init (&demangled_name, 0)) - { - *status = -1; - return NULL; - } - } - else { - /* Yes. Check that the length was provided. */ - if (length == NULL) { - *status = -3; - return NULL; - } - /* Install the buffer into a dyn_string. */ - demangled_name.allocated = *length; - demangled_name.length = 0; - demangled_name.s = output_buffer; - } - - if (mangled_name[0] == '_' && mangled_name[1] == 'Z') - /* MANGLED_NAME apprears to be a function or variable name. - Demangle it accordingly. */ - result = cp_demangle (mangled_name, &demangled_name, 0); - else - /* Try to demangled MANGLED_NAME as the name of a type. */ - result = cp_demangle_type (mangled_name, &demangled_name); - - if (result == STATUS_OK) - /* The demangling succeeded. */ - { - /* If LENGTH isn't NULL, store the allocated buffer length - there; the buffer may have been realloced by dyn_string - functions. */ - if (length != NULL) - *length = demangled_name.allocated; - /* The operation was a success. */ - *status = 0; - return dyn_string_buf (&demangled_name); - } - else if (result == STATUS_ALLOCATION_FAILED) - /* A call to malloc or realloc failed during the demangling - operation. */ - { - *status = -1; - return NULL; - } - else - /* The demangling failed for another reason, most probably because - MANGLED_NAME isn't a valid mangled name. */ - { - /* If the buffer containing the demangled name wasn't provided - by the caller, free it. */ - if (output_buffer == NULL) - free (dyn_string_buf (&demangled_name)); - *status = -2; - return NULL; - } -} - -#else /* !IN_LIBGCC2 */ - -/* Variant entry point for integration with the existing cplus-dem - demangler. Attempts to demangle MANGLED. If the demangling - succeeds, returns a buffer, allocated with malloc, containing the - demangled name. The caller must deallocate the buffer using free. - If the demangling failes, returns NULL. */ - -char * -VG_(cplus_demangle_v3) (mangled) - const char* mangled; -{ - dyn_string_t demangled; - status_t status; - - /* If this isn't a mangled name, don't pretend to demangle it. */ - if (VG_(strncmp) (mangled, "_Z", 2) != 0) - return NULL; - - /* Create a dyn_string to hold the demangled name. */ - demangled = dyn_string_new (0); - /* Attempt the demangling. */ - status = cp_demangle ((char *) mangled, demangled, 0); - - if (STATUS_NO_ERROR (status)) - /* Demangling succeeded. */ - { - /* Grab the demangled result from the dyn_string. It was - allocated with malloc, so we can return it directly. */ - char *return_value = dyn_string_release (demangled); - /* Hand back the demangled name. */ - return return_value; - } - else if (status == STATUS_ALLOCATION_FAILED) - { - vg_assert (0); - /* - fprintf (stderr, "Memory allocation failed.\n"); - abort (); - */ - } - else - /* Demangling failed. */ - { - dyn_string_delete (demangled); - return NULL; - } -} - -/* Demangle a Java symbol. Java uses a subset of the V3 ABI C++ mangling - conventions, but the output formatting is a little different. - This instructs the C++ demangler not to emit pointer characters ("*"), and - to use Java's namespace separator symbol ("." instead of "::"). It then - does an additional pass over the demangled output to replace instances - of JArray with TYPE[]. */ - -char * -VG_(java_demangle_v3) (mangled) - const char* mangled; -{ - dyn_string_t demangled; - char *next; - char *end; - int len; - status_t status; - int nesting = 0; - char *cplus_demangled; - char *return_value; - - /* Create a dyn_string to hold the demangled name. */ - demangled = dyn_string_new (0); - - /* Attempt the demangling. */ - status = cp_demangle ((char *) mangled, demangled, DMGL_JAVA); - - if (STATUS_NO_ERROR (status)) - /* Demangling succeeded. */ - { - /* Grab the demangled result from the dyn_string. */ - cplus_demangled = dyn_string_release (demangled); - } - else if (status == STATUS_ALLOCATION_FAILED) - { - vg_assert (0); - /* - fprintf (stderr, "Memory allocation failed.\n"); - abort (); - */ - } - else - /* Demangling failed. */ - { - dyn_string_delete (demangled); - return NULL; - } - - len = VG_(strlen) (cplus_demangled); - next = cplus_demangled; - end = next + len; - demangled = NULL; - - /* Replace occurances of JArray with TYPE[]. */ - while (next < end) - { - char *open_str = VG_(strstr) (next, "JArray<"); - char *close_str = NULL; - if (nesting > 0) - close_str = VG_(strchr) (next, '>'); - - if (open_str != NULL && (close_str == NULL || close_str > open_str)) - { - ++nesting; - - if (!demangled) - demangled = dyn_string_new(len); - - /* Copy prepending symbols, if any. */ - if (open_str > next) - { - open_str[0] = 0; - dyn_string_append_cstr (demangled, next); - } - next = open_str + 7; - } - else if (close_str != NULL) - { - --nesting; - - /* Copy prepending type symbol, if any. Squash any spurious - whitespace. */ - if (close_str > next && next[0] != ' ') - { - close_str[0] = 0; - dyn_string_append_cstr (demangled, next); - } - dyn_string_append_cstr (demangled, "[]"); - next = close_str + 1; - } - else - { - /* There are no more arrays. Copy the rest of the symbol, or - simply return the original symbol if no changes were made. */ - if (next == cplus_demangled) - return cplus_demangled; - - dyn_string_append_cstr (demangled, next); - next = end; - } - } - - free (cplus_demangled); - - return_value = dyn_string_release (demangled); - return return_value; -} - -#endif /* IN_LIBGCC2 */ - - -/* Demangle NAME in the G++ V3 ABI demangling style, and return either - zero, indicating that some error occurred, or a demangling_t - holding the results. */ -static demangling_t -demangle_v3_with_details (name) - const char *name; -{ - demangling_t dm; - status_t status; - - if (VG_(strncmp) (name, "_Z", 2)) - return 0; - - dm = demangling_new (name, DMGL_GNU_V3); - if (dm == NULL) - { - vg_assert (0); - /* - fprintf (stderr, "Memory allocation failed.\n"); - abort (); - */ - } - - status = result_push (dm); - if (! STATUS_NO_ERROR (status)) - { - demangling_delete (dm); - vg_assert (0); - /* - fprintf (stderr, "%s\n", status); - abort (); - */ - } - - status = demangle_mangled_name (dm); - if (STATUS_NO_ERROR (status)) - return dm; - - demangling_delete (dm); - return 0; -} - - -/* Return non-zero iff NAME is the mangled form of a constructor name - in the G++ V3 ABI demangling style. Specifically, return: - - '1' if NAME is a complete object constructor, - - '2' if NAME is a base object constructor, or - - '3' if NAME is a complete object allocating constructor. */ -/* -enum gnu_v3_ctor_kinds -is_gnu_v3_mangled_ctor (name) - const char *name; -{ - demangling_t dm = demangle_v3_with_details (name); - - if (dm) - { - enum gnu_v3_ctor_kinds result = dm->is_constructor; - demangling_delete (dm); - return result; - } - else - return 0; -} -*/ - - -/* Return non-zero iff NAME is the mangled form of a destructor name - in the G++ V3 ABI demangling style. Specifically, return: - - '0' if NAME is a deleting destructor, - - '1' if NAME is a complete object destructor, or - - '2' if NAME is a base object destructor. */ -/* -enum gnu_v3_dtor_kinds -is_gnu_v3_mangled_dtor (name) - const char *name; -{ - demangling_t dm = demangle_v3_with_details (name); - - if (dm) - { - enum gnu_v3_dtor_kinds result = dm->is_destructor; - demangling_delete (dm); - return result; - } - else - return 0; -} -*/ - -#ifdef STANDALONE_DEMANGLER - -#include "getopt.h" - -static void print_usage - PARAMS ((FILE* fp, int exit_value)); - -/* Non-zero if CHAR is a character than can occur in a mangled name. */ -#define is_mangled_char(CHAR) \ - (IS_ALPHA (CHAR) || IS_DIGIT (CHAR) \ - || (CHAR) == '_' || (CHAR) == '.' || (CHAR) == '$') - -/* The name of this program, as invoked. */ -const char* program_name; - -/* Prints usage summary to FP and then exits with EXIT_VALUE. */ - -static void -print_usage (fp, exit_value) - FILE* fp; - int exit_value; -{ - fprintf (fp, "Usage: %s [options] [names ...]\n", program_name); - fprintf (fp, "Options:\n"); - fprintf (fp, " -h,--help Display this message.\n"); - fprintf (fp, " -s,--strict Demangle standard names only.\n"); - fprintf (fp, " -v,--verbose Produce verbose demanglings.\n"); - fprintf (fp, "If names are provided, they are demangled. Otherwise filters standard input.\n"); - - exit (exit_value); -} - -/* Option specification for getopt_long. */ -static const struct option long_options[] = -{ - { "help", no_argument, NULL, 'h' }, - { "strict", no_argument, NULL, 's' }, - { "verbose", no_argument, NULL, 'v' }, - { NULL, no_argument, NULL, 0 }, -}; - -/* Main entry for a demangling filter executable. It will demangle - its command line arguments, if any. If none are provided, it will - filter stdin to stdout, replacing any recognized mangled C++ names - with their demangled equivalents. */ - -int -main (argc, argv) - int argc; - char *argv[]; -{ - status_t status; - int i; - int opt_char; - - /* Use the program name of this program, as invoked. */ - program_name = argv[0]; - - /* Parse options. */ - do - { - opt_char = getopt_long (argc, argv, "hsv", long_options, NULL); - switch (opt_char) - { - case '?': /* Unrecognized option. */ - print_usage (stderr, 1); - break; - - case 'h': - print_usage (stdout, 0); - break; - - case 's': - flag_strict = 1; - break; - - case 'v': - flag_verbose = 1; - break; - } - } - while (opt_char != -1); - - if (optind == argc) - /* No command line arguments were provided. Filter stdin. */ - { - dyn_string_t mangled = dyn_string_new (3); - dyn_string_t demangled = dyn_string_new (0); - status_t status; - - /* Read all of input. */ - while (!feof (stdin)) - { - char c = getchar (); - - /* The first character of a mangled name is an underscore. */ - if (feof (stdin)) - break; - if (c != '_') - { - /* It's not a mangled name. Print the character and go - on. */ - putchar (c); - continue; - } - c = getchar (); - - /* The second character of a mangled name is a capital `Z'. */ - if (feof (stdin)) - break; - if (c != 'Z') - { - /* It's not a mangled name. Print the previous - underscore, the `Z', and go on. */ - putchar ('_'); - putchar (c); - continue; - } - - /* Start keeping track of the candidate mangled name. */ - dyn_string_append_char (mangled, '_'); - dyn_string_append_char (mangled, 'Z'); - - /* Pile characters into mangled until we hit one that can't - occur in a mangled name. */ - c = getchar (); - while (!feof (stdin) && is_mangled_char (c)) - { - dyn_string_append_char (mangled, c); - if (feof (stdin)) - break; - c = getchar (); - } - - /* Attempt to demangle the name. */ - status = cp_demangle (dyn_string_buf (mangled), demangled, 0); - - /* If the demangling succeeded, great! Print out the - demangled version. */ - if (STATUS_NO_ERROR (status)) - fputs (dyn_string_buf (demangled), stdout); - /* Abort on allocation failures. */ - else if (status == STATUS_ALLOCATION_FAILED) - { - fprintf (stderr, "Memory allocation failed.\n"); - abort (); - } - /* Otherwise, it might not have been a mangled name. Just - print out the original text. */ - else - fputs (dyn_string_buf (mangled), stdout); - - /* If we haven't hit EOF yet, we've read one character that - can't occur in a mangled name, so print it out. */ - if (!feof (stdin)) - putchar (c); - - /* Clear the candidate mangled name, to start afresh next - time we hit a `_Z'. */ - dyn_string_clear (mangled); - } - - dyn_string_delete (mangled); - dyn_string_delete (demangled); - } - else - /* Demangle command line arguments. */ - { - dyn_string_t result = dyn_string_new (0); - - /* Loop over command line arguments. */ - for (i = optind; i < argc; ++i) - { - /* Attempt to demangle. */ - status = cp_demangle (argv[i], result, 0); - - /* If it worked, print the demangled name. */ - if (STATUS_NO_ERROR (status)) - printf ("%s\n", dyn_string_buf (result)); - /* Abort on allocaiton failures. */ - else if (status == STATUS_ALLOCATION_FAILED) - { - fprintf (stderr, "Memory allocation failed.\n"); - abort (); - } - /* If not, print the error message to stderr instead. */ - else - fprintf (stderr, "%s\n", status); - } - dyn_string_delete (result); - } - - return 0; -} - -#endif /* STANDALONE_DEMANGLER */ diff --git a/coregrind/demangle/cplus-dem.c b/coregrind/demangle/cplus-dem.c deleted file mode 100644 index 56c3261391..0000000000 --- a/coregrind/demangle/cplus-dem.c +++ /dev/null @@ -1,5264 +0,0 @@ -/* Demangler for GNU C++ - Copyright 1989, 1991, 1994, 1995, 1996, 1997, 1998, 1999, - 2000, 2001 Free Software Foundation, Inc. - Written by James Clark (jjc@jclark.uucp) - Rewritten by Fred Fish (fnf@cygnus.com) for ARM and Lucid demangling - Modified by Satish Pai (pai@apollo.hp.com) for HP demangling - -This file is part of the libiberty library. -Libiberty is free software; you can redistribute it and/or -modify it under the terms of the GNU Library General Public -License as published by the Free Software Foundation; either -version 2 of the License, or (at your option) any later version. - -Libiberty is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Library General Public License for more details. - -You should have received a copy of the GNU Library General Public -License along with libiberty; see the file COPYING.LIB. If -not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ - -/* This file exports two functions; cplus_mangle_opname and cplus_demangle. - - This file imports xmalloc and xrealloc, which are like malloc and - realloc except that they generate a fatal error if there is no - available memory. */ - -/* This file lives in both GCC and libiberty. When making changes, please - try not to break either. */ - -#define __NO_STRING_INLINES - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#include "safe-ctype.h" -#include "vg_include.h" - -#include -#include -#include - -#ifdef HAVE_STDLIB_H -#include -#else -char * malloc (); -char * realloc (); -#endif - -#include -#include "dyn-string.h" -#undef CURRENT_DEMANGLING_STYLE -#define CURRENT_DEMANGLING_STYLE work->options - -/*#include "libiberty.h"*/ - -static char *ada_demangle PARAMS ((const char *, int)); - -#define min(X,Y) (((X) < (Y)) ? (X) : (Y)) - -/* A value at least one greater than the maximum number of characters - that will be output when using the `%d' format with `printf'. */ -#define INTBUF_SIZE 32 - -#ifndef ARRAY_SIZE -#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0])) -#endif - -#ifndef STANDALONE -#define xstrdup(ptr) VG_(strdup)(VG_AR_DEMANGLE, ptr) -#define free(ptr) VG_(free)(VG_AR_DEMANGLE, ptr) -#define xmalloc(size) VG_(malloc)(VG_AR_DEMANGLE, size) -#define xrealloc(ptr, size) VG_(realloc)(VG_AR_DEMANGLE, ptr, size) -#define abort() vg_assert(0) -#undef strstr -#define strstr VG_(strstr) -#define sprintf VG_(sprintf) -#define strncpy VG_(strncpy) -#define strncat VG_(strncat) -#define strchr VG_(strchr) -#define strpbrk VG_(strpbrk) -#endif - -extern void fancy_abort PARAMS ((void)) ATTRIBUTE_NORETURN; - -/* In order to allow a single demangler executable to demangle strings - using various common values of CPLUS_MARKER, as well as any specific - one set at compile time, we maintain a string containing all the - commonly used ones, and check to see if the marker we are looking for - is in that string. CPLUS_MARKER is usually '$' on systems where the - assembler can deal with that. Where the assembler can't, it's usually - '.' (but on many systems '.' is used for other things). We put the - current defined CPLUS_MARKER first (which defaults to '$'), followed - by the next most common value, followed by an explicit '$' in case - the value of CPLUS_MARKER is not '$'. - - We could avoid this if we could just get g++ to tell us what the actual - cplus marker character is as part of the debug information, perhaps by - ensuring that it is the character that terminates the gcc_compiled - marker symbol (FIXME). */ - -#if !defined (CPLUS_MARKER) -#define CPLUS_MARKER '$' -#endif - -enum demangling_styles current_demangling_style = auto_demangling; - -static char cplus_markers[] = { CPLUS_MARKER, '.', '$', '\0' }; - -static char char_str[2] = { '\000', '\000' }; - -/* -void -set_cplus_marker_for_demangling (ch) - int ch; -{ - cplus_markers[0] = ch; -} -*/ - -typedef struct string /* Beware: these aren't required to be */ -{ /* '\0' terminated. */ - char *b; /* pointer to start of string */ - char *p; /* pointer after last character */ - char *e; /* pointer after end of allocated space */ -} string; - -/* Stuff that is shared between sub-routines. - Using a shared structure allows cplus_demangle to be reentrant. */ - -struct work_stuff -{ - int options; - char **typevec; - char **ktypevec; - char **btypevec; - int numk; - int numb; - int ksize; - int bsize; - int ntypes; - int typevec_size; - int constructor; - int destructor; - int static_type; /* A static member function */ - int temp_start; /* index in demangled to start of template args */ - int type_quals; /* The type qualifiers. */ - int dllimported; /* Symbol imported from a PE DLL */ - char **tmpl_argvec; /* Template function arguments. */ - int ntmpl_args; /* The number of template function arguments. */ - int forgetting_types; /* Nonzero if we are not remembering the types - we see. */ - string* previous_argument; /* The last function argument demangled. */ - int nrepeats; /* The number of times to repeat the previous - argument. */ -}; - -#define PRINT_ANSI_QUALIFIERS (work -> options & DMGL_ANSI) -#define PRINT_ARG_TYPES (work -> options & DMGL_PARAMS) - -static const struct optable -{ - const char *const in; - const char *const out; - const int flags; -} optable[] = { - {"nw", " new", DMGL_ANSI}, /* new (1.92, ansi) */ - {"dl", " delete", DMGL_ANSI}, /* new (1.92, ansi) */ - {"new", " new", 0}, /* old (1.91, and 1.x) */ - {"delete", " delete", 0}, /* old (1.91, and 1.x) */ - {"vn", " new []", DMGL_ANSI}, /* GNU, pending ansi */ - {"vd", " delete []", DMGL_ANSI}, /* GNU, pending ansi */ - {"as", "=", DMGL_ANSI}, /* ansi */ - {"ne", "!=", DMGL_ANSI}, /* old, ansi */ - {"eq", "==", DMGL_ANSI}, /* old, ansi */ - {"ge", ">=", DMGL_ANSI}, /* old, ansi */ - {"gt", ">", DMGL_ANSI}, /* old, ansi */ - {"le", "<=", DMGL_ANSI}, /* old, ansi */ - {"lt", "<", DMGL_ANSI}, /* old, ansi */ - {"plus", "+", 0}, /* old */ - {"pl", "+", DMGL_ANSI}, /* ansi */ - {"apl", "+=", DMGL_ANSI}, /* ansi */ - {"minus", "-", 0}, /* old */ - {"mi", "-", DMGL_ANSI}, /* ansi */ - {"ami", "-=", DMGL_ANSI}, /* ansi */ - {"mult", "*", 0}, /* old */ - {"ml", "*", DMGL_ANSI}, /* ansi */ - {"amu", "*=", DMGL_ANSI}, /* ansi (ARM/Lucid) */ - {"aml", "*=", DMGL_ANSI}, /* ansi (GNU/g++) */ - {"convert", "+", 0}, /* old (unary +) */ - {"negate", "-", 0}, /* old (unary -) */ - {"trunc_mod", "%", 0}, /* old */ - {"md", "%", DMGL_ANSI}, /* ansi */ - {"amd", "%=", DMGL_ANSI}, /* ansi */ - {"trunc_div", "/", 0}, /* old */ - {"dv", "/", DMGL_ANSI}, /* ansi */ - {"adv", "/=", DMGL_ANSI}, /* ansi */ - {"truth_andif", "&&", 0}, /* old */ - {"aa", "&&", DMGL_ANSI}, /* ansi */ - {"truth_orif", "||", 0}, /* old */ - {"oo", "||", DMGL_ANSI}, /* ansi */ - {"truth_not", "!", 0}, /* old */ - {"nt", "!", DMGL_ANSI}, /* ansi */ - {"postincrement","++", 0}, /* old */ - {"pp", "++", DMGL_ANSI}, /* ansi */ - {"postdecrement","--", 0}, /* old */ - {"mm", "--", DMGL_ANSI}, /* ansi */ - {"bit_ior", "|", 0}, /* old */ - {"or", "|", DMGL_ANSI}, /* ansi */ - {"aor", "|=", DMGL_ANSI}, /* ansi */ - {"bit_xor", "^", 0}, /* old */ - {"er", "^", DMGL_ANSI}, /* ansi */ - {"aer", "^=", DMGL_ANSI}, /* ansi */ - {"bit_and", "&", 0}, /* old */ - {"ad", "&", DMGL_ANSI}, /* ansi */ - {"aad", "&=", DMGL_ANSI}, /* ansi */ - {"bit_not", "~", 0}, /* old */ - {"co", "~", DMGL_ANSI}, /* ansi */ - {"call", "()", 0}, /* old */ - {"cl", "()", DMGL_ANSI}, /* ansi */ - {"alshift", "<<", 0}, /* old */ - {"ls", "<<", DMGL_ANSI}, /* ansi */ - {"als", "<<=", DMGL_ANSI}, /* ansi */ - {"arshift", ">>", 0}, /* old */ - {"rs", ">>", DMGL_ANSI}, /* ansi */ - {"ars", ">>=", DMGL_ANSI}, /* ansi */ - {"component", "->", 0}, /* old */ - {"pt", "->", DMGL_ANSI}, /* ansi; Lucid C++ form */ - {"rf", "->", DMGL_ANSI}, /* ansi; ARM/GNU form */ - {"indirect", "*", 0}, /* old */ - {"method_call", "->()", 0}, /* old */ - {"addr", "&", 0}, /* old (unary &) */ - {"array", "[]", 0}, /* old */ - {"vc", "[]", DMGL_ANSI}, /* ansi */ - {"compound", ", ", 0}, /* old */ - {"cm", ", ", DMGL_ANSI}, /* ansi */ - {"cond", "?:", 0}, /* old */ - {"cn", "?:", DMGL_ANSI}, /* pseudo-ansi */ - {"max", ">?", 0}, /* old */ - {"mx", ">?", DMGL_ANSI}, /* pseudo-ansi */ - {"min", "*", DMGL_ANSI}, /* ansi */ - {"sz", "sizeof ", DMGL_ANSI} /* pseudo-ansi */ -}; - -/* These values are used to indicate the various type varieties. - They are all non-zero so that they can be used as `success' - values. */ -typedef enum type_kind_t -{ - tk_none, - tk_pointer, - tk_reference, - tk_integral, - tk_bool, - tk_char, - tk_real -} type_kind_t; - -const struct demangler_engine libiberty_demanglers[] = -{ - { - NO_DEMANGLING_STYLE_STRING, - no_demangling, - "Demangling disabled" - } - , - { - AUTO_DEMANGLING_STYLE_STRING, - auto_demangling, - "Automatic selection based on executable" - } - , - { - GNU_DEMANGLING_STYLE_STRING, - gnu_demangling, - "GNU (g++) style demangling" - } - , - { - LUCID_DEMANGLING_STYLE_STRING, - lucid_demangling, - "Lucid (lcc) style demangling" - } - , - { - ARM_DEMANGLING_STYLE_STRING, - arm_demangling, - "ARM style demangling" - } - , - { - HP_DEMANGLING_STYLE_STRING, - hp_demangling, - "HP (aCC) style demangling" - } - , - { - EDG_DEMANGLING_STYLE_STRING, - edg_demangling, - "EDG style demangling" - } - , - { - GNU_V3_DEMANGLING_STYLE_STRING, - gnu_v3_demangling, - "GNU (g++) V3 ABI-style demangling" - } - , - { - JAVA_DEMANGLING_STYLE_STRING, - java_demangling, - "Java style demangling" - } - , - { - GNAT_DEMANGLING_STYLE_STRING, - gnat_demangling, - "GNAT style demangling" - } - , - { - NULL, unknown_demangling, NULL - } -}; - -#define STRING_EMPTY(str) ((str) -> b == (str) -> p) -#define PREPEND_BLANK(str) {if (!STRING_EMPTY(str)) \ - string_prepend(str, " ");} -#define APPEND_BLANK(str) {if (!STRING_EMPTY(str)) \ - string_append(str, " ");} -#define LEN_STRING(str) ( (STRING_EMPTY(str))?0:((str)->p - (str)->b)) - -/* The scope separator appropriate for the language being demangled. */ - -#define SCOPE_STRING(work) ((work->options & DMGL_JAVA) ? "." : "::") - -#define ARM_VTABLE_STRING "__vtbl__" /* Lucid/ARM virtual table prefix */ -#define ARM_VTABLE_STRLEN 8 /* strlen (ARM_VTABLE_STRING) */ - -/* Prototypes for local functions */ - -static void -delete_work_stuff PARAMS ((struct work_stuff *)); - -static void -delete_non_B_K_work_stuff PARAMS ((struct work_stuff *)); - -static char * -mop_up PARAMS ((struct work_stuff *, string *, int)); - -static void -squangle_mop_up PARAMS ((struct work_stuff *)); - -static void -work_stuff_copy_to_from PARAMS ((struct work_stuff *, struct work_stuff *)); - -#if 0 -static int -demangle_method_args PARAMS ((struct work_stuff *, const char **, string *)); -#endif - -static char * -internal_cplus_demangle PARAMS ((struct work_stuff *, const char *)); - -static int -demangle_template_template_parm PARAMS ((struct work_stuff *work, - const char **, string *)); - -static int -demangle_template PARAMS ((struct work_stuff *work, const char **, string *, - string *, int, int)); - -static int -arm_pt PARAMS ((struct work_stuff *, const char *, int, const char **, - const char **)); - -static int -demangle_class_name PARAMS ((struct work_stuff *, const char **, string *)); - -static int -demangle_qualified PARAMS ((struct work_stuff *, const char **, string *, - int, int)); - -static int -demangle_class PARAMS ((struct work_stuff *, const char **, string *)); - -static int -demangle_fund_type PARAMS ((struct work_stuff *, const char **, string *)); - -static int -demangle_signature PARAMS ((struct work_stuff *, const char **, string *)); - -static int -demangle_prefix PARAMS ((struct work_stuff *, const char **, string *)); - -static int -gnu_special PARAMS ((struct work_stuff *, const char **, string *)); - -static int -arm_special PARAMS ((const char **, string *)); - -static void -string_need PARAMS ((string *, int)); - -static void -string_delete PARAMS ((string *)); - -static void -string_init PARAMS ((string *)); - -static void -string_clear PARAMS ((string *)); - -#if 0 -static int -string_empty PARAMS ((string *)); -#endif - -static void -string_append PARAMS ((string *, const char *)); - -static void -string_appends PARAMS ((string *, string *)); - -static void -string_appendn PARAMS ((string *, const char *, int)); - -static void -string_prepend PARAMS ((string *, const char *)); - -static void -string_prependn PARAMS ((string *, const char *, int)); - -static void -string_append_template_idx PARAMS ((string *, int)); - -static int -get_count PARAMS ((const char **, int *)); - -static int -consume_count PARAMS ((const char **)); - -static int -consume_count_with_underscores PARAMS ((const char**)); - -static int -demangle_args PARAMS ((struct work_stuff *, const char **, string *)); - -static int -demangle_nested_args PARAMS ((struct work_stuff*, const char**, string*)); - -static int -do_type PARAMS ((struct work_stuff *, const char **, string *)); - -static int -do_arg PARAMS ((struct work_stuff *, const char **, string *)); - -static void -demangle_function_name PARAMS ((struct work_stuff *, const char **, string *, - const char *)); - -static int -iterate_demangle_function PARAMS ((struct work_stuff *, - const char **, string *, const char *)); - -static void -remember_type PARAMS ((struct work_stuff *, const char *, int)); - -static void -remember_Btype PARAMS ((struct work_stuff *, const char *, int, int)); - -static int -register_Btype PARAMS ((struct work_stuff *)); - -static void -remember_Ktype PARAMS ((struct work_stuff *, const char *, int)); - -static void -forget_types PARAMS ((struct work_stuff *)); - -static void -forget_B_and_K_types PARAMS ((struct work_stuff *)); - -static void -string_prepends PARAMS ((string *, string *)); - -static int -demangle_template_value_parm PARAMS ((struct work_stuff*, const char**, - string*, type_kind_t)); - -static int -do_hpacc_template_const_value PARAMS ((struct work_stuff *, const char **, string *)); - -static int -do_hpacc_template_literal PARAMS ((struct work_stuff *, const char **, string *)); - -static int -snarf_numeric_literal PARAMS ((const char **, string *)); - -/* There is a TYPE_QUAL value for each type qualifier. They can be - combined by bitwise-or to form the complete set of qualifiers for a - type. */ - -#define TYPE_UNQUALIFIED 0x0 -#define TYPE_QUAL_CONST 0x1 -#define TYPE_QUAL_VOLATILE 0x2 -#define TYPE_QUAL_RESTRICT 0x4 - -static int -code_for_qualifier PARAMS ((int)); - -static const char* -qualifier_string PARAMS ((int)); - -static const char* -demangle_qualifier PARAMS ((int)); - -static int -demangle_expression PARAMS ((struct work_stuff *, const char **, string *, - type_kind_t)); - -static int -demangle_integral_value PARAMS ((struct work_stuff *, const char **, - string *)); - -static int -demangle_real_value PARAMS ((struct work_stuff *, const char **, string *)); - -static void -demangle_arm_hp_template PARAMS ((struct work_stuff *, const char **, int, - string *)); - -static void -recursively_demangle PARAMS ((struct work_stuff *, const char **, string *, - int)); - -static void -grow_vect PARAMS ((void **, size_t *, size_t, int)); - -/* Translate count to integer, consuming tokens in the process. - Conversion terminates on the first non-digit character. - - Trying to consume something that isn't a count results in no - consumption of input and a return of -1. - - Overflow consumes the rest of the digits, and returns -1. */ - -static int -consume_count (type) - const char **type; -{ - int count = 0; - - if (! ISDIGIT ((unsigned char)**type)) - return -1; - - while (ISDIGIT ((unsigned char)**type)) - { - count *= 10; - - /* Check for overflow. - We assume that count is represented using two's-complement; - no power of two is divisible by ten, so if an overflow occurs - when multiplying by ten, the result will not be a multiple of - ten. */ - if ((count % 10) != 0) - { - while (ISDIGIT ((unsigned char) **type)) - (*type)++; - return -1; - } - - count += **type - '0'; - (*type)++; - } - - if (count < 0) - count = -1; - - return (count); -} - - -/* Like consume_count, but for counts that are preceded and followed - by '_' if they are greater than 10. Also, -1 is returned for - failure, since 0 can be a valid value. */ - -static int -consume_count_with_underscores (mangled) - const char **mangled; -{ - int idx; - - if (**mangled == '_') - { - (*mangled)++; - if (!ISDIGIT ((unsigned char)**mangled)) - return -1; - - idx = consume_count (mangled); - if (**mangled != '_') - /* The trailing underscore was missing. */ - return -1; - - (*mangled)++; - } - else - { - if (**mangled < '0' || **mangled > '9') - return -1; - - idx = **mangled - '0'; - (*mangled)++; - } - - return idx; -} - -/* C is the code for a type-qualifier. Return the TYPE_QUAL - corresponding to this qualifier. */ - -static int -code_for_qualifier (c) - int c; -{ - switch (c) - { - case 'C': - return TYPE_QUAL_CONST; - - case 'V': - return TYPE_QUAL_VOLATILE; - - case 'u': - return TYPE_QUAL_RESTRICT; - - default: - break; - } - - /* C was an invalid qualifier. */ - abort (); -} - -/* Return the string corresponding to the qualifiers given by - TYPE_QUALS. */ - -static const char* -qualifier_string (type_quals) - int type_quals; -{ - switch (type_quals) - { - case TYPE_UNQUALIFIED: - return ""; - - case TYPE_QUAL_CONST: - return "const"; - - case TYPE_QUAL_VOLATILE: - return "volatile"; - - case TYPE_QUAL_RESTRICT: - return "__restrict"; - - case TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE: - return "const volatile"; - - case TYPE_QUAL_CONST | TYPE_QUAL_RESTRICT: - return "const __restrict"; - - case TYPE_QUAL_VOLATILE | TYPE_QUAL_RESTRICT: - return "volatile __restrict"; - - case TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE | TYPE_QUAL_RESTRICT: - return "const volatile __restrict"; - - default: - break; - } - - /* TYPE_QUALS was an invalid qualifier set. */ - abort (); -} - -/* C is the code for a type-qualifier. Return the string - corresponding to this qualifier. This function should only be - called with a valid qualifier code. */ - -static const char* -demangle_qualifier (c) - int c; -{ - return qualifier_string (code_for_qualifier (c)); -} - -#if 0 -int -cplus_demangle_opname (opname, result, options) - const char *opname; - char *result; - int options; -{ - int len, len1, ret; - string type; - struct work_stuff work[1]; - const char *tem; - - len = strlen(opname); - result[0] = '\0'; - ret = 0; - memset ((char *) work, 0, sizeof (work)); - work->options = options; - - if (opname[0] == '_' && opname[1] == '_' - && opname[2] == 'o' && opname[3] == 'p') - { - /* ANSI. */ - /* type conversion operator. */ - tem = opname + 4; - if (do_type (work, &tem, &type)) - { - strcat (result, "operator "); - strncat (result, type.b, type.p - type.b); - string_delete (&type); - ret = 1; - } - } - else if (opname[0] == '_' && opname[1] == '_' - && ISLOWER((unsigned char)opname[2]) - && ISLOWER((unsigned char)opname[3])) - { - if (opname[4] == '\0') - { - /* Operator. */ - size_t i; - for (i = 0; i < ARRAY_SIZE (optable); i++) - { - if (strlen (optable[i].in) == 2 - && memcmp (optable[i].in, opname + 2, 2) == 0) - { - strcat (result, "operator"); - strcat (result, optable[i].out); - ret = 1; - break; - } - } - } - else - { - if (opname[2] == 'a' && opname[5] == '\0') - { - /* Assignment. */ - size_t i; - for (i = 0; i < ARRAY_SIZE (optable); i++) - { - if (strlen (optable[i].in) == 3 - && memcmp (optable[i].in, opname + 2, 3) == 0) - { - strcat (result, "operator"); - strcat (result, optable[i].out); - ret = 1; - break; - } - } - } - } - } - else if (len >= 3 - && opname[0] == 'o' - && opname[1] == 'p' - && strchr (cplus_markers, opname[2]) != NULL) - { - /* see if it's an assignment expression */ - if (len >= 10 /* op$assign_ */ - && memcmp (opname + 3, "assign_", 7) == 0) - { - size_t i; - for (i = 0; i < ARRAY_SIZE (optable); i++) - { - len1 = len - 10; - if ((int) strlen (optable[i].in) == len1 - && memcmp (optable[i].in, opname + 10, len1) == 0) - { - strcat (result, "operator"); - strcat (result, optable[i].out); - strcat (result, "="); - ret = 1; - break; - } - } - } - else - { - size_t i; - for (i = 0; i < ARRAY_SIZE (optable); i++) - { - len1 = len - 3; - if ((int) strlen (optable[i].in) == len1 - && memcmp (optable[i].in, opname + 3, len1) == 0) - { - strcat (result, "operator"); - strcat (result, optable[i].out); - ret = 1; - break; - } - } - } - } - else if (len >= 5 && memcmp (opname, "type", 4) == 0 - && strchr (cplus_markers, opname[4]) != NULL) - { - /* type conversion operator */ - tem = opname + 5; - if (do_type (work, &tem, &type)) - { - strcat (result, "operator "); - strncat (result, type.b, type.p - type.b); - string_delete (&type); - ret = 1; - } - } - squangle_mop_up (work); - return ret; - -} -#endif /* 0 */ - -/* Takes operator name as e.g. "++" and returns mangled - operator name (e.g. "postincrement_expr"), or NULL if not found. - - If OPTIONS & DMGL_ANSI == 1, return the ANSI name; - if OPTIONS & DMGL_ANSI == 0, return the old GNU name. */ - -/* -const char * -cplus_mangle_opname (opname, options) - const char *opname; - int options; -{ - size_t i; - int len; - - len = strlen (opname); - for (i = 0; i < ARRAY_SIZE (optable); i++) - { - if ((int) strlen (optable[i].out) == len - && (options & DMGL_ANSI) == (optable[i].flags & DMGL_ANSI) - && memcmp (optable[i].out, opname, len) == 0) - return optable[i].in; - } - return (0); -} -*/ - -/* Add a routine to set the demangling style to be sure it is valid and - allow for any demangler initialization that maybe necessary. */ - -/* -enum demangling_styles -cplus_demangle_set_style (style) - enum demangling_styles style; -{ - const struct demangler_engine *demangler = libiberty_demanglers; - - for (; demangler->demangling_style != unknown_demangling; ++demangler) - if (style == demangler->demangling_style) - { - current_demangling_style = style; - return current_demangling_style; - } - - return unknown_demangling; -} -*/ - -/* Do string name to style translation */ - -/* -enum demangling_styles -cplus_demangle_name_to_style (name) - const char *name; -{ - const struct demangler_engine *demangler = libiberty_demanglers; - - for (; demangler->demangling_style != unknown_demangling; ++demangler) - if (strcmp (name, demangler->demangling_style_name) == 0) - return demangler->demangling_style; - - return unknown_demangling; -} -*/ - -/* char *cplus_demangle (const char *mangled, int options) - - If MANGLED is a mangled function name produced by GNU C++, then - a pointer to a @code{malloc}ed string giving a C++ representation - of the name will be returned; otherwise NULL will be returned. - It is the caller's responsibility to free the string which - is returned. - - The OPTIONS arg may contain one or more of the following bits: - - DMGL_ANSI ANSI qualifiers such as `const' and `void' are - included. - DMGL_PARAMS Function parameters are included. - - For example, - - cplus_demangle ("foo__1Ai", DMGL_PARAMS) => "A::foo(int)" - cplus_demangle ("foo__1Ai", DMGL_PARAMS | DMGL_ANSI) => "A::foo(int)" - cplus_demangle ("foo__1Ai", 0) => "A::foo" - - cplus_demangle ("foo__1Afe", DMGL_PARAMS) => "A::foo(float,...)" - cplus_demangle ("foo__1Afe", DMGL_PARAMS | DMGL_ANSI)=> "A::foo(float,...)" - cplus_demangle ("foo__1Afe", 0) => "A::foo" - - Note that any leading underscores, or other such characters prepended by - the compilation system, are presumed to have already been stripped from - MANGLED. */ - -char * -VG_(cplus_demangle) (mangled, options) - const char *mangled; - int options; -{ - char *ret; - struct work_stuff work[1]; - - if (current_demangling_style == no_demangling) - return xstrdup (mangled); - - memset ((char *) work, 0, sizeof (work)); - work->options = options; - if ((work->options & DMGL_STYLE_MASK) == 0) - work->options |= (int) current_demangling_style & DMGL_STYLE_MASK; - - /* The V3 ABI demangling is implemented elsewhere. */ - if (GNU_V3_DEMANGLING || AUTO_DEMANGLING) - { - ret = VG_(cplus_demangle_v3) (mangled/*, work->options*/); - if (ret || GNU_V3_DEMANGLING) - return ret; - } - - if (JAVA_DEMANGLING) - { - ret = VG_(java_demangle_v3) (mangled); - if (ret) - return ret; - } - - if (GNAT_DEMANGLING) - return ada_demangle(mangled,options); - - ret = internal_cplus_demangle (work, mangled); - squangle_mop_up (work); - return (ret); -} - - -/* Assuming *OLD_VECT points to an array of *SIZE objects of size - ELEMENT_SIZE, grow it to contain at least MIN_SIZE objects, - updating *OLD_VECT and *SIZE as necessary. */ - -static void -grow_vect (old_vect, size, min_size, element_size) - void **old_vect; - size_t *size; - size_t min_size; - int element_size; -{ - if (*size < min_size) - { - *size *= 2; - if (*size < min_size) - *size = min_size; - *old_vect = xrealloc (*old_vect, *size * element_size); - } -} - -/* Demangle ada names: - 1. Discard final __{DIGIT}+ or ${DIGIT}+ - 2. Convert other instances of embedded "__" to `.'. - 3. Discard leading _ada_. - 4. Remove everything after first ___ if it is followed by 'X'. - 5. Put symbols that should be suppressed in <...> brackets. - The resulting string is valid until the next call of ada_demangle. */ - -static char * -ada_demangle (mangled, option) - const char *mangled; - int option ATTRIBUTE_UNUSED; -{ - int i, j; - int len0; - const char* p; - char *demangled = NULL; - int at_start_name; - int changed; - char *demangling_buffer = NULL; - size_t demangling_buffer_size = 0; - - changed = 0; - - if (strncmp (mangled, "_ada_", 5) == 0) - { - mangled += 5; - changed = 1; - } - - if (mangled[0] == '_' || mangled[0] == '<') - goto Suppress; - - p = strstr (mangled, "___"); - if (p == NULL) - len0 = strlen (mangled); - else - { - if (p[3] == 'X') - { - len0 = p - mangled; - changed = 1; - } - else - goto Suppress; - } - - /* Make demangled big enough for possible expansion by operator name. */ - grow_vect ((void **) &(demangling_buffer), - &demangling_buffer_size, 2 * len0 + 1, - sizeof (char)); - demangled = demangling_buffer; - - if (ISDIGIT ((unsigned char) mangled[len0 - 1])) { - for (i = len0 - 2; i >= 0 && ISDIGIT ((unsigned char) mangled[i]); i -= 1) - ; - if (i > 1 && mangled[i] == '_' && mangled[i - 1] == '_') - { - len0 = i - 1; - changed = 1; - } - else if (mangled[i] == '$') - { - len0 = i; - changed = 1; - } - } - - for (i = 0, j = 0; i < len0 && ! ISALPHA ((unsigned char)mangled[i]); - i += 1, j += 1) - demangled[j] = mangled[i]; - - at_start_name = 1; - while (i < len0) - { - at_start_name = 0; - - if (i < len0 - 2 && mangled[i] == '_' && mangled[i + 1] == '_') - { - demangled[j] = '.'; - changed = at_start_name = 1; - i += 2; j += 1; - } - else - { - demangled[j] = mangled[i]; - i += 1; j += 1; - } - } - demangled[j] = '\000'; - - for (i = 0; demangled[i] != '\0'; i += 1) - if (ISUPPER ((unsigned char)demangled[i]) || demangled[i] == ' ') - goto Suppress; - - if (! changed) - return NULL; - else - return demangled; - - Suppress: - grow_vect ((void **) &(demangling_buffer), - &demangling_buffer_size, strlen (mangled) + 3, - sizeof (char)); - demangled = demangling_buffer; - if (mangled[0] == '<') - strcpy (demangled, mangled); - else - sprintf (demangled, "<%s>", mangled); - - return demangled; -} - -/* This function performs most of what cplus_demangle use to do, but - to be able to demangle a name with a B, K or n code, we need to - have a longer term memory of what types have been seen. The original - now intializes and cleans up the squangle code info, while internal - calls go directly to this routine to avoid resetting that info. */ - -static char * -internal_cplus_demangle (work, mangled) - struct work_stuff *work; - const char *mangled; -{ - - string decl; - int success = 0; - char *demangled = NULL; - int s1, s2, s3, s4; - s1 = work->constructor; - s2 = work->destructor; - s3 = work->static_type; - s4 = work->type_quals; - work->constructor = work->destructor = 0; - work->type_quals = TYPE_UNQUALIFIED; - work->dllimported = 0; - - if ((mangled != NULL) && (*mangled != '\0')) - { - string_init (&decl); - - /* First check to see if gnu style demangling is active and if the - string to be demangled contains a CPLUS_MARKER. If so, attempt to - recognize one of the gnu special forms rather than looking for a - standard prefix. In particular, don't worry about whether there - is a "__" string in the mangled string. Consider "_$_5__foo" for - example. */ - - if ((AUTO_DEMANGLING || GNU_DEMANGLING)) - { - success = gnu_special (work, &mangled, &decl); - } - if (!success) - { - success = demangle_prefix (work, &mangled, &decl); - } - if (success && (*mangled != '\0')) - { - success = demangle_signature (work, &mangled, &decl); - } - if (work->constructor == 2) - { - string_prepend (&decl, "global constructors keyed to "); - work->constructor = 0; - } - else if (work->destructor == 2) - { - string_prepend (&decl, "global destructors keyed to "); - work->destructor = 0; - } - else if (work->dllimported == 1) - { - string_prepend (&decl, "import stub for "); - work->dllimported = 0; - } - demangled = mop_up (work, &decl, success); - } - work->constructor = s1; - work->destructor = s2; - work->static_type = s3; - work->type_quals = s4; - return demangled; -} - - -/* Clear out and squangling related storage */ -static void -squangle_mop_up (work) - struct work_stuff *work; -{ - /* clean up the B and K type mangling types. */ - forget_B_and_K_types (work); - if (work -> btypevec != NULL) - { - free ((char *) work -> btypevec); - } - if (work -> ktypevec != NULL) - { - free ((char *) work -> ktypevec); - } -} - - -/* Copy the work state and storage. */ - -static void -work_stuff_copy_to_from (to, from) - struct work_stuff *to; - struct work_stuff *from; -{ - int i; - - delete_work_stuff (to); - - /* Shallow-copy scalars. */ - memcpy (to, from, sizeof (*to)); - - /* Deep-copy dynamic storage. */ - if (from->typevec_size) - to->typevec - = (char **) xmalloc (from->typevec_size * sizeof (to->typevec[0])); - - for (i = 0; i < from->ntypes; i++) - { - int len = strlen (from->typevec[i]) + 1; - - to->typevec[i] = xmalloc (len); - memcpy (to->typevec[i], from->typevec[i], len); - } - - if (from->ksize) - to->ktypevec - = (char **) xmalloc (from->ksize * sizeof (to->ktypevec[0])); - - for (i = 0; i < from->numk; i++) - { - int len = strlen (from->ktypevec[i]) + 1; - - to->ktypevec[i] = xmalloc (len); - memcpy (to->ktypevec[i], from->ktypevec[i], len); - } - - if (from->bsize) - to->btypevec - = (char **) xmalloc (from->bsize * sizeof (to->btypevec[0])); - - for (i = 0; i < from->numb; i++) - { - int len = strlen (from->btypevec[i]) + 1; - - to->btypevec[i] = xmalloc (len); - memcpy (to->btypevec[i], from->btypevec[i], len); - } - - if (from->ntmpl_args) - to->tmpl_argvec - = xmalloc (from->ntmpl_args * sizeof (to->tmpl_argvec[0])); - - for (i = 0; i < from->ntmpl_args; i++) - { - int len = strlen (from->tmpl_argvec[i]) + 1; - - to->tmpl_argvec[i] = xmalloc (len); - memcpy (to->tmpl_argvec[i], from->tmpl_argvec[i], len); - } - - if (from->previous_argument) - { - to->previous_argument = (string*) xmalloc (sizeof (string)); - string_init (to->previous_argument); - string_appends (to->previous_argument, from->previous_argument); - } -} - - -/* Delete dynamic stuff in work_stuff that is not to be re-used. */ - -static void -delete_non_B_K_work_stuff (work) - struct work_stuff *work; -{ - /* Discard the remembered types, if any. */ - - forget_types (work); - if (work -> typevec != NULL) - { - free ((char *) work -> typevec); - work -> typevec = NULL; - work -> typevec_size = 0; - } - if (work->tmpl_argvec) - { - int i; - - for (i = 0; i < work->ntmpl_args; i++) - if (work->tmpl_argvec[i]) - free ((char*) work->tmpl_argvec[i]); - - free ((char*) work->tmpl_argvec); - work->tmpl_argvec = NULL; - } - if (work->previous_argument) - { - string_delete (work->previous_argument); - free ((char*) work->previous_argument); - work->previous_argument = NULL; - } -} - - -/* Delete all dynamic storage in work_stuff. */ -static void -delete_work_stuff (work) - struct work_stuff *work; -{ - delete_non_B_K_work_stuff (work); - squangle_mop_up (work); -} - - -/* Clear out any mangled storage */ - -static char * -mop_up (work, declp, success) - struct work_stuff *work; - string *declp; - int success; -{ - char *demangled = NULL; - - delete_non_B_K_work_stuff (work); - - /* If demangling was successful, ensure that the demangled string is null - terminated and return it. Otherwise, free the demangling decl. */ - - if (!success) - { - string_delete (declp); - } - else - { - string_appendn (declp, "", 1); - demangled = declp->b; - } - return (demangled); -} - -/* - -LOCAL FUNCTION - - demangle_signature -- demangle the signature part of a mangled name - -SYNOPSIS - - static int - demangle_signature (struct work_stuff *work, const char **mangled, - string *declp); - -DESCRIPTION - - Consume and demangle the signature portion of the mangled name. - - DECLP is the string where demangled output is being built. At - entry it contains the demangled root name from the mangled name - prefix. I.E. either a demangled operator name or the root function - name. In some special cases, it may contain nothing. - - *MANGLED points to the current unconsumed location in the mangled - name. As tokens are consumed and demangling is performed, the - pointer is updated to continuously point at the next token to - be consumed. - - Demangling GNU style mangled names is nasty because there is no - explicit token that marks the start of the outermost function - argument list. */ - -static int -demangle_signature (work, mangled, declp) - struct work_stuff *work; - const char **mangled; - string *declp; -{ - int success = 1; - int func_done = 0; - int expect_func = 0; - int expect_return_type = 0; - const char *oldmangled = NULL; - string trawname; - string tname; - - while (success && (**mangled != '\0')) - { - switch (**mangled) - { - case 'Q': - oldmangled = *mangled; - success = demangle_qualified (work, mangled, declp, 1, 0); - if (success) - remember_type (work, oldmangled, *mangled - oldmangled); - if (AUTO_DEMANGLING || GNU_DEMANGLING) - expect_func = 1; - oldmangled = NULL; - break; - - case 'K': - oldmangled = *mangled; - success = demangle_qualified (work, mangled, declp, 1, 0); - if (AUTO_DEMANGLING || GNU_DEMANGLING) - { - expect_func = 1; - } - oldmangled = NULL; - break; - - case 'S': - /* Static member function */ - if (oldmangled == NULL) - { - oldmangled = *mangled; - } - (*mangled)++; - work -> static_type = 1; - break; - - case 'C': - case 'V': - case 'u': - work->type_quals |= code_for_qualifier (**mangled); - - /* a qualified member function */ - if (oldmangled == NULL) - oldmangled = *mangled; - (*mangled)++; - break; - - case 'L': - /* Local class name follows after "Lnnn_" */ - if (HP_DEMANGLING) - { - while (**mangled && (**mangled != '_')) - (*mangled)++; - if (!**mangled) - success = 0; - else - (*mangled)++; - } - else - success = 0; - break; - - case '0': case '1': case '2': case '3': case '4': - case '5': case '6': case '7': case '8': case '9': - if (oldmangled == NULL) - { - oldmangled = *mangled; - } - work->temp_start = -1; /* uppermost call to demangle_class */ - success = demangle_class (work, mangled, declp); - if (success) - { - remember_type (work, oldmangled, *mangled - oldmangled); - } - if (AUTO_DEMANGLING || GNU_DEMANGLING || EDG_DEMANGLING) - { - /* EDG and others will have the "F", so we let the loop cycle - if we are looking at one. */ - if (**mangled != 'F') - expect_func = 1; - } - oldmangled = NULL; - break; - - case 'B': - { - string s; - success = do_type (work, mangled, &s); - if (success) - { - string_append (&s, SCOPE_STRING (work)); - string_prepends (declp, &s); - } - oldmangled = NULL; - expect_func = 1; - } - break; - - case 'F': - /* Function */ - /* ARM/HP style demangling includes a specific 'F' character after - the class name. For GNU style, it is just implied. So we can - safely just consume any 'F' at this point and be compatible - with either style. */ - - oldmangled = NULL; - func_done = 1; - (*mangled)++; - - /* For lucid/ARM/HP style we have to forget any types we might - have remembered up to this point, since they were not argument - types. GNU style considers all types seen as available for - back references. See comment in demangle_args() */ - - if (LUCID_DEMANGLING || ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING) - { - forget_types (work); - } - success = demangle_args (work, mangled, declp); - /* After picking off the function args, we expect to either - find the function return type (preceded by an '_') or the - end of the string. */ - if (success && (AUTO_DEMANGLING || EDG_DEMANGLING) && **mangled == '_') - { - ++(*mangled); - /* At this level, we do not care about the return type. */ - success = do_type (work, mangled, &tname); - string_delete (&tname); - } - - break; - - case 't': - /* G++ Template */ - string_init(&trawname); - string_init(&tname); - if (oldmangled == NULL) - { - oldmangled = *mangled; - } - success = demangle_template (work, mangled, &tname, - &trawname, 1, 1); - if (success) - { - remember_type (work, oldmangled, *mangled - oldmangled); - } - string_append (&tname, SCOPE_STRING (work)); - - string_prepends(declp, &tname); - if (work -> destructor & 1) - { - string_prepend (&trawname, "~"); - string_appends (declp, &trawname); - work->destructor -= 1; - } - if ((work->constructor & 1) || (work->destructor & 1)) - { - string_appends (declp, &trawname); - work->constructor -= 1; - } - string_delete(&trawname); - string_delete(&tname); - oldmangled = NULL; - expect_func = 1; - break; - - case '_': - if ((AUTO_DEMANGLING || GNU_DEMANGLING) && expect_return_type) - { - /* Read the return type. */ - string return_type; - string_init (&return_type); - - (*mangled)++; - success = do_type (work, mangled, &return_type); - APPEND_BLANK (&return_type); - - string_prepends (declp, &return_type); - string_delete (&return_type); - break; - } - else - /* At the outermost level, we cannot have a return type specified, - so if we run into another '_' at this point we are dealing with - a mangled name that is either bogus, or has been mangled by - some algorithm we don't know how to deal with. So just - reject the entire demangling. */ - /* However, "_nnn" is an expected suffix for alternate entry point - numbered nnn for a function, with HP aCC, so skip over that - without reporting failure. pai/1997-09-04 */ - if (HP_DEMANGLING) - { - (*mangled)++; - while (**mangled && ISDIGIT ((unsigned char)**mangled)) - (*mangled)++; - } - else - success = 0; - break; - - case 'H': - if (AUTO_DEMANGLING || GNU_DEMANGLING) - { - /* A G++ template function. Read the template arguments. */ - success = demangle_template (work, mangled, declp, 0, 0, - 0); - if (!(work->constructor & 1)) - expect_return_type = 1; - (*mangled)++; - break; - } - else - /* fall through */ - {;} - - default: - if (AUTO_DEMANGLING || GNU_DEMANGLING) - { - /* Assume we have stumbled onto the first outermost function - argument token, and start processing args. */ - func_done = 1; - success = demangle_args (work, mangled, declp); - } - else - { - /* Non-GNU demanglers use a specific token to mark the start - of the outermost function argument tokens. Typically 'F', - for ARM/HP-demangling, for example. So if we find something - we are not prepared for, it must be an error. */ - success = 0; - } - break; - } - /* - if (AUTO_DEMANGLING || GNU_DEMANGLING) - */ - { - if (success && expect_func) - { - func_done = 1; - if (LUCID_DEMANGLING || ARM_DEMANGLING || EDG_DEMANGLING) - { - forget_types (work); - } - success = demangle_args (work, mangled, declp); - /* Since template include the mangling of their return types, - we must set expect_func to 0 so that we don't try do - demangle more arguments the next time we get here. */ - expect_func = 0; - } - } - } - if (success && !func_done) - { - if (AUTO_DEMANGLING || GNU_DEMANGLING) - { - /* With GNU style demangling, bar__3foo is 'foo::bar(void)', and - bar__3fooi is 'foo::bar(int)'. We get here when we find the - first case, and need to ensure that the '(void)' gets added to - the current declp. Note that with ARM/HP, the first case - represents the name of a static data member 'foo::bar', - which is in the current declp, so we leave it alone. */ - success = demangle_args (work, mangled, declp); - } - } - if (success && PRINT_ARG_TYPES) - { - if (work->static_type) - string_append (declp, " static"); - if (work->type_quals != TYPE_UNQUALIFIED) - { - APPEND_BLANK (declp); - string_append (declp, qualifier_string (work->type_quals)); - } - } - - return (success); -} - -#if 0 - -static int -demangle_method_args (work, mangled, declp) - struct work_stuff *work; - const char **mangled; - string *declp; -{ - int success = 0; - - if (work -> static_type) - { - string_append (declp, *mangled + 1); - *mangled += strlen (*mangled); - success = 1; - } - else - { - success = demangle_args (work, mangled, declp); - } - return (success); -} - -#endif - -static int -demangle_template_template_parm (work, mangled, tname) - struct work_stuff *work; - const char **mangled; - string *tname; -{ - int i; - int r; - int need_comma = 0; - int success = 1; - string temp; - - string_append (tname, "template <"); - /* get size of template parameter list */ - if (get_count (mangled, &r)) - { - for (i = 0; i < r; i++) - { - if (need_comma) - { - string_append (tname, ", "); - } - - /* Z for type parameters */ - if (**mangled == 'Z') - { - (*mangled)++; - string_append (tname, "class"); - } - /* z for template parameters */ - else if (**mangled == 'z') - { - (*mangled)++; - success = - demangle_template_template_parm (work, mangled, tname); - if (!success) - { - break; - } - } - else - { - /* temp is initialized in do_type */ - success = do_type (work, mangled, &temp); - if (success) - { - string_appends (tname, &temp); - } - string_delete(&temp); - if (!success) - { - break; - } - } - need_comma = 1; - } - - } - if (tname->p[-1] == '>') - string_append (tname, " "); - string_append (tname, "> class"); - return (success); -} - -static int -demangle_expression (work, mangled, s, tk) - struct work_stuff *work; - const char** mangled; - string* s; - type_kind_t tk; -{ - int need_operator = 0; - int success; - - success = 1; - string_appendn (s, "(", 1); - (*mangled)++; - while (success && **mangled != 'W' && **mangled != '\0') - { - if (need_operator) - { - size_t i; - size_t len; - - success = 0; - - len = strlen (*mangled); - - for (i = 0; i < ARRAY_SIZE (optable); ++i) - { - size_t l = strlen (optable[i].in); - - if (l <= len - && memcmp (optable[i].in, *mangled, l) == 0) - { - string_appendn (s, " ", 1); - string_append (s, optable[i].out); - string_appendn (s, " ", 1); - success = 1; - (*mangled) += l; - break; - } - } - - if (!success) - break; - } - else - need_operator = 1; - - success = demangle_template_value_parm (work, mangled, s, tk); - } - - if (**mangled != 'W') - success = 0; - else - { - string_appendn (s, ")", 1); - (*mangled)++; - } - - return success; -} - -static int -demangle_integral_value (work, mangled, s) - struct work_stuff *work; - const char** mangled; - string* s; -{ - int success; - - if (**mangled == 'E') - success = demangle_expression (work, mangled, s, tk_integral); - else if (**mangled == 'Q' || **mangled == 'K') - success = demangle_qualified (work, mangled, s, 0, 1); - else - { - int value; - - /* By default, we let the number decide whether we shall consume an - underscore. */ - int consume_following_underscore = 0; - int leave_following_underscore = 0; - - success = 0; - - /* Negative numbers are indicated with a leading `m'. */ - if (**mangled == 'm') - { - string_appendn (s, "-", 1); - (*mangled)++; - } - else if (mangled[0][0] == '_' && mangled[0][1] == 'm') - { - /* Since consume_count_with_underscores does not handle the - `m'-prefix we must do it here, using consume_count and - adjusting underscores: we have to consume the underscore - matching the prepended one. */ - consume_following_underscore = 1; - string_appendn (s, "-", 1); - (*mangled) += 2; - } - else if (**mangled == '_') - { - /* Do not consume a following underscore; - consume_following_underscore will consume what should be - consumed. */ - leave_following_underscore = 1; - } - - /* We must call consume_count if we expect to remove a trailing - underscore, since consume_count_with_underscores expects - the leading underscore (that we consumed) if it is to handle - multi-digit numbers. */ - if (consume_following_underscore) - value = consume_count (mangled); - else - value = consume_count_with_underscores (mangled); - - if (value != -1) - { - char buf[INTBUF_SIZE]; - sprintf (buf, "%d", value); - string_append (s, buf); - - /* Numbers not otherwise delimited, might have an underscore - appended as a delimeter, which we should skip. - - ??? This used to always remove a following underscore, which - is wrong. If other (arbitrary) cases are followed by an - underscore, we need to do something more radical. */ - - if ((value > 9 || consume_following_underscore) - && ! leave_following_underscore - && **mangled == '_') - (*mangled)++; - - /* All is well. */ - success = 1; - } - } - - return success; -} - -/* Demangle the real value in MANGLED. */ - -static int -demangle_real_value (work, mangled, s) - struct work_stuff *work; - const char **mangled; - string* s; -{ - if (**mangled == 'E') - return demangle_expression (work, mangled, s, tk_real); - - if (**mangled == 'm') - { - string_appendn (s, "-", 1); - (*mangled)++; - } - while (ISDIGIT ((unsigned char)**mangled)) - { - string_appendn (s, *mangled, 1); - (*mangled)++; - } - if (**mangled == '.') /* fraction */ - { - string_appendn (s, ".", 1); - (*mangled)++; - while (ISDIGIT ((unsigned char)**mangled)) - { - string_appendn (s, *mangled, 1); - (*mangled)++; - } - } - if (**mangled == 'e') /* exponent */ - { - string_appendn (s, "e", 1); - (*mangled)++; - while (ISDIGIT ((unsigned char)**mangled)) - { - string_appendn (s, *mangled, 1); - (*mangled)++; - } - } - - return 1; -} - -static int -demangle_template_value_parm (work, mangled, s, tk) - struct work_stuff *work; - const char **mangled; - string* s; - type_kind_t tk; -{ - int success = 1; - - if (**mangled == 'Y') - { - /* The next argument is a template parameter. */ - int idx; - - (*mangled)++; - idx = consume_count_with_underscores (mangled); - if (idx == -1 - || (work->tmpl_argvec && idx >= work->ntmpl_args) - || consume_count_with_underscores (mangled) == -1) - return -1; - if (work->tmpl_argvec) - string_append (s, work->tmpl_argvec[idx]); - else - string_append_template_idx (s, idx); - } - else if (tk == tk_integral) - success = demangle_integral_value (work, mangled, s); - else if (tk == tk_char) - { - char tmp[2]; - int val; - if (**mangled == 'm') - { - string_appendn (s, "-", 1); - (*mangled)++; - } - string_appendn (s, "'", 1); - val = consume_count(mangled); - if (val <= 0) - success = 0; - else - { - tmp[0] = (char)val; - tmp[1] = '\0'; - string_appendn (s, &tmp[0], 1); - string_appendn (s, "'", 1); - } - } - else if (tk == tk_bool) - { - int val = consume_count (mangled); - if (val == 0) - string_appendn (s, "false", 5); - else if (val == 1) - string_appendn (s, "true", 4); - else - success = 0; - } - else if (tk == tk_real) - success = demangle_real_value (work, mangled, s); - else if (tk == tk_pointer || tk == tk_reference) - { - if (**mangled == 'Q') - success = demangle_qualified (work, mangled, s, - /*isfuncname=*/0, - /*append=*/1); - else - { - int symbol_len = consume_count (mangled); - if (symbol_len == -1) - return -1; - if (symbol_len == 0) - string_appendn (s, "0", 1); - else - { - char *p = xmalloc (symbol_len + 1), *q; - strncpy (p, *mangled, symbol_len); - p [symbol_len] = '\0'; - /* We use cplus_demangle here, rather than - internal_cplus_demangle, because the name of the entity - mangled here does not make use of any of the squangling - or type-code information we have built up thus far; it is - mangled independently. */ - q = VG_(cplus_demangle) (p, work->options); - if (tk == tk_pointer) - string_appendn (s, "&", 1); - /* FIXME: Pointer-to-member constants should get a - qualifying class name here. */ - if (q) - { - string_append (s, q); - free (q); - } - else - string_append (s, p); - free (p); - } - *mangled += symbol_len; - } - } - - return success; -} - -/* Demangle the template name in MANGLED. The full name of the - template (e.g., S) is placed in TNAME. The name without the - template parameters (e.g. S) is placed in TRAWNAME if TRAWNAME is - non-NULL. If IS_TYPE is nonzero, this template is a type template, - not a function template. If both IS_TYPE and REMEMBER are nonzero, - the template is remembered in the list of back-referenceable - types. */ - -static int -demangle_template (work, mangled, tname, trawname, is_type, remember) - struct work_stuff *work; - const char **mangled; - string *tname; - string *trawname; - int is_type; - int remember; -{ - int i; - int r; - int need_comma = 0; - int success = 0; - const char *start; - int is_java_array = 0; - string temp; - int bindex = 0; - - (*mangled)++; - if (is_type) - { - if (remember) - bindex = register_Btype (work); - start = *mangled; - /* get template name */ - if (**mangled == 'z') - { - int idx; - (*mangled)++; - (*mangled)++; - - idx = consume_count_with_underscores (mangled); - if (idx == -1 - || (work->tmpl_argvec && idx >= work->ntmpl_args) - || consume_count_with_underscores (mangled) == -1) - return (0); - - if (work->tmpl_argvec) - { - string_append (tname, work->tmpl_argvec[idx]); - if (trawname) - string_append (trawname, work->tmpl_argvec[idx]); - } - else - { - string_append_template_idx (tname, idx); - if (trawname) - string_append_template_idx (trawname, idx); - } - } - else - { - if ((r = consume_count (mangled)) <= 0 - || (int) strlen (*mangled) < r) - { - return (0); - } - is_java_array = (work -> options & DMGL_JAVA) - && strncmp (*mangled, "JArray1Z", 8) == 0; - if (! is_java_array) - { - string_appendn (tname, *mangled, r); - } - if (trawname) - string_appendn (trawname, *mangled, r); - *mangled += r; - } - } - if (!is_java_array) - string_append (tname, "<"); - /* get size of template parameter list */ - if (!get_count (mangled, &r)) - { - return (0); - } - if (!is_type) - { - /* Create an array for saving the template argument values. */ - work->tmpl_argvec = (char**) xmalloc (r * sizeof (char *)); - work->ntmpl_args = r; - for (i = 0; i < r; i++) - work->tmpl_argvec[i] = 0; - } - for (i = 0; i < r; i++) - { - if (need_comma) - { - string_append (tname, ", "); - } - /* Z for type parameters */ - if (**mangled == 'Z') - { - (*mangled)++; - /* temp is initialized in do_type */ - success = do_type (work, mangled, &temp); - if (success) - { - string_appends (tname, &temp); - - if (!is_type) - { - /* Save the template argument. */ - int len = temp.p - temp.b; - work->tmpl_argvec[i] = xmalloc (len + 1); - memcpy (work->tmpl_argvec[i], temp.b, len); - work->tmpl_argvec[i][len] = '\0'; - } - } - string_delete(&temp); - if (!success) - { - break; - } - } - /* z for template parameters */ - else if (**mangled == 'z') - { - int r2; - (*mangled)++; - success = demangle_template_template_parm (work, mangled, tname); - - if (success - && (r2 = consume_count (mangled)) > 0 - && (int) strlen (*mangled) >= r2) - { - string_append (tname, " "); - string_appendn (tname, *mangled, r2); - if (!is_type) - { - /* Save the template argument. */ - int len = r2; - work->tmpl_argvec[i] = xmalloc (len + 1); - memcpy (work->tmpl_argvec[i], *mangled, len); - work->tmpl_argvec[i][len] = '\0'; - } - *mangled += r2; - } - if (!success) - { - break; - } - } - else - { - string param; - string* s; - - /* otherwise, value parameter */ - - /* temp is initialized in do_type */ - success = do_type (work, mangled, &temp); - string_delete(&temp); - if (!success) - break; - - if (!is_type) - { - s = ¶m; - string_init (s); - } - else - s = tname; - - success = demangle_template_value_parm (work, mangled, s, - (type_kind_t) success); - - if (!success) - { - if (!is_type) - string_delete (s); - success = 0; - break; - } - - if (!is_type) - { - int len = s->p - s->b; - work->tmpl_argvec[i] = xmalloc (len + 1); - memcpy (work->tmpl_argvec[i], s->b, len); - work->tmpl_argvec[i][len] = '\0'; - - string_appends (tname, s); - string_delete (s); - } - } - need_comma = 1; - } - if (is_java_array) - { - string_append (tname, "[]"); - } - else - { - if (tname->p[-1] == '>') - string_append (tname, " "); - string_append (tname, ">"); - } - - if (is_type && remember) - remember_Btype (work, tname->b, LEN_STRING (tname), bindex); - - /* - if (work -> static_type) - { - string_append (declp, *mangled + 1); - *mangled += strlen (*mangled); - success = 1; - } - else - { - success = demangle_args (work, mangled, declp); - } - } - */ - return (success); -} - -static int -arm_pt (work, mangled, n, anchor, args) - struct work_stuff *work; - const char *mangled; - int n; - const char **anchor, **args; -{ - /* Check if ARM template with "__pt__" in it ("parameterized type") */ - /* Allow HP also here, because HP's cfront compiler follows ARM to some extent */ - if ((ARM_DEMANGLING || HP_DEMANGLING) && (*anchor = strstr (mangled, "__pt__"))) - { - int len; - *args = *anchor + 6; - len = consume_count (args); - if (len == -1) - return 0; - if (*args + len == mangled + n && **args == '_') - { - ++*args; - return 1; - } - } - if (AUTO_DEMANGLING || EDG_DEMANGLING) - { - if ((*anchor = strstr (mangled, "__tm__")) - || (*anchor = strstr (mangled, "__ps__")) - || (*anchor = strstr (mangled, "__pt__"))) - { - int len; - *args = *anchor + 6; - len = consume_count (args); - if (len == -1) - return 0; - if (*args + len == mangled + n && **args == '_') - { - ++*args; - return 1; - } - } - else if ((*anchor = strstr (mangled, "__S"))) - { - int len; - *args = *anchor + 3; - len = consume_count (args); - if (len == -1) - return 0; - if (*args + len == mangled + n && **args == '_') - { - ++*args; - return 1; - } - } - } - - return 0; -} - -static void -demangle_arm_hp_template (work, mangled, n, declp) - struct work_stuff *work; - const char **mangled; - int n; - string *declp; -{ - const char *p; - const char *args; - const char *e = *mangled + n; - string arg; - - /* Check for HP aCC template spec: classXt1t2 where t1, t2 are - template args */ - if (HP_DEMANGLING && ((*mangled)[n] == 'X')) - { - char *start_spec_args = NULL; - - /* First check for and omit template specialization pseudo-arguments, - such as in "Spec<#1,#1.*>" */ - start_spec_args = strchr (*mangled, '<'); - if (start_spec_args && (start_spec_args - *mangled < n)) - string_appendn (declp, *mangled, start_spec_args - *mangled); - else - string_appendn (declp, *mangled, n); - (*mangled) += n + 1; - string_init (&arg); - if (work->temp_start == -1) /* non-recursive call */ - work->temp_start = declp->p - declp->b; - string_append (declp, "<"); - while (1) - { - string_clear (&arg); - switch (**mangled) - { - case 'T': - /* 'T' signals a type parameter */ - (*mangled)++; - if (!do_type (work, mangled, &arg)) - goto hpacc_template_args_done; - break; - - case 'U': - case 'S': - /* 'U' or 'S' signals an integral value */ - if (!do_hpacc_template_const_value (work, mangled, &arg)) - goto hpacc_template_args_done; - break; - - case 'A': - /* 'A' signals a named constant expression (literal) */ - if (!do_hpacc_template_literal (work, mangled, &arg)) - goto hpacc_template_args_done; - break; - - default: - /* Today, 1997-09-03, we have only the above types - of template parameters */ - /* FIXME: maybe this should fail and return null */ - goto hpacc_template_args_done; - } - string_appends (declp, &arg); - /* Check if we're at the end of template args. - 0 if at end of static member of template class, - _ if done with template args for a function */ - if ((**mangled == '\000') || (**mangled == '_')) - break; - else - string_append (declp, ","); - } - hpacc_template_args_done: - string_append (declp, ">"); - string_delete (&arg); - if (**mangled == '_') - (*mangled)++; - return; - } - /* ARM template? (Also handles HP cfront extensions) */ - else if (arm_pt (work, *mangled, n, &p, &args)) - { - string type_str; - - string_init (&arg); - string_appendn (declp, *mangled, p - *mangled); - if (work->temp_start == -1) /* non-recursive call */ - work->temp_start = declp->p - declp->b; - string_append (declp, "<"); - /* should do error checking here */ - while (args < e) { - string_clear (&arg); - - /* Check for type or literal here */ - switch (*args) - { - /* HP cfront extensions to ARM for template args */ - /* spec: Xt1Lv1 where t1 is a type, v1 is a literal value */ - /* FIXME: We handle only numeric literals for HP cfront */ - case 'X': - /* A typed constant value follows */ - args++; - if (!do_type (work, &args, &type_str)) - goto cfront_template_args_done; - string_append (&arg, "("); - string_appends (&arg, &type_str); - string_append (&arg, ")"); - if (*args != 'L') - goto cfront_template_args_done; - args++; - /* Now snarf a literal value following 'L' */ - if (!snarf_numeric_literal (&args, &arg)) - goto cfront_template_args_done; - break; - - case 'L': - /* Snarf a literal following 'L' */ - args++; - if (!snarf_numeric_literal (&args, &arg)) - goto cfront_template_args_done; - break; - default: - /* Not handling other HP cfront stuff */ - if (!do_type (work, &args, &arg)) - goto cfront_template_args_done; - } - string_appends (declp, &arg); - string_append (declp, ","); - } - cfront_template_args_done: - string_delete (&arg); - if (args >= e) - --declp->p; /* remove extra comma */ - string_append (declp, ">"); - } - else if (n>10 && strncmp (*mangled, "_GLOBAL_", 8) == 0 - && (*mangled)[9] == 'N' - && (*mangled)[8] == (*mangled)[10] - && strchr (cplus_markers, (*mangled)[8])) - { - /* A member of the anonymous namespace. */ - string_append (declp, "{anonymous}"); - } - else - { - if (work->temp_start == -1) /* non-recursive call only */ - work->temp_start = 0; /* disable in recursive calls */ - string_appendn (declp, *mangled, n); - } - *mangled += n; -} - -/* Extract a class name, possibly a template with arguments, from the - mangled string; qualifiers, local class indicators, etc. have - already been dealt with */ - -static int -demangle_class_name (work, mangled, declp) - struct work_stuff *work; - const char **mangled; - string *declp; -{ - int n; - int success = 0; - - n = consume_count (mangled); - if (n == -1) - return 0; - if ((int) strlen (*mangled) >= n) - { - demangle_arm_hp_template (work, mangled, n, declp); - success = 1; - } - - return (success); -} - -/* - -LOCAL FUNCTION - - demangle_class -- demangle a mangled class sequence - -SYNOPSIS - - static int - demangle_class (struct work_stuff *work, const char **mangled, - strint *declp) - -DESCRIPTION - - DECLP points to the buffer into which demangling is being done. - - *MANGLED points to the current token to be demangled. On input, - it points to a mangled class (I.E. "3foo", "13verylongclass", etc.) - On exit, it points to the next token after the mangled class on - success, or the first unconsumed token on failure. - - If the CONSTRUCTOR or DESTRUCTOR flags are set in WORK, then - we are demangling a constructor or destructor. In this case - we prepend "class::class" or "class::~class" to DECLP. - - Otherwise, we prepend "class::" to the current DECLP. - - Reset the constructor/destructor flags once they have been - "consumed". This allows demangle_class to be called later during - the same demangling, to do normal class demangling. - - Returns 1 if demangling is successful, 0 otherwise. - -*/ - -static int -demangle_class (work, mangled, declp) - struct work_stuff *work; - const char **mangled; - string *declp; -{ - int success = 0; - int btype; - string class_name; - char *save_class_name_end = 0; - - string_init (&class_name); - btype = register_Btype (work); - if (demangle_class_name (work, mangled, &class_name)) - { - save_class_name_end = class_name.p; - if ((work->constructor & 1) || (work->destructor & 1)) - { - /* adjust so we don't include template args */ - if (work->temp_start && (work->temp_start != -1)) - { - class_name.p = class_name.b + work->temp_start; - } - string_prepends (declp, &class_name); - if (work -> destructor & 1) - { - string_prepend (declp, "~"); - work -> destructor -= 1; - } - else - { - work -> constructor -= 1; - } - } - class_name.p = save_class_name_end; - remember_Ktype (work, class_name.b, LEN_STRING(&class_name)); - remember_Btype (work, class_name.b, LEN_STRING(&class_name), btype); - string_prepend (declp, SCOPE_STRING (work)); - string_prepends (declp, &class_name); - success = 1; - } - string_delete (&class_name); - return (success); -} - - -/* Called when there's a "__" in the mangled name, with `scan' pointing to - the rightmost guess. - - Find the correct "__"-sequence where the function name ends and the - signature starts, which is ambiguous with GNU mangling. - Call demangle_signature here, so we can make sure we found the right - one; *mangled will be consumed so caller will not make further calls to - demangle_signature. */ - -static int -iterate_demangle_function (work, mangled, declp, scan) - struct work_stuff *work; - const char **mangled; - string *declp; - const char *scan; -{ - const char *mangle_init = *mangled; - int success = 0; - string decl_init; - struct work_stuff work_init; - - if (*(scan + 2) == '\0') - return 0; - - /* Do not iterate for some demangling modes, or if there's only one - "__"-sequence. This is the normal case. */ - if (ARM_DEMANGLING || LUCID_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING - || strstr (scan + 2, "__") == NULL) - { - demangle_function_name (work, mangled, declp, scan); - return 1; - } - - /* Save state so we can restart if the guess at the correct "__" was - wrong. */ - string_init (&decl_init); - string_appends (&decl_init, declp); - memset (&work_init, 0, sizeof work_init); - work_stuff_copy_to_from (&work_init, work); - - /* Iterate over occurrences of __, allowing names and types to have a - "__" sequence in them. We must start with the first (not the last) - occurrence, since "__" most often occur between independent mangled - parts, hence starting at the last occurence inside a signature - might get us a "successful" demangling of the signature. */ - - while (scan[2]) - { - demangle_function_name (work, mangled, declp, scan); - success = demangle_signature (work, mangled, declp); - if (success) - break; - - /* Reset demangle state for the next round. */ - *mangled = mangle_init; - string_clear (declp); - string_appends (declp, &decl_init); - work_stuff_copy_to_from (work, &work_init); - - /* Leave this underscore-sequence. */ - scan += 2; - - /* Scan for the next "__" sequence. */ - while (*scan && (scan[0] != '_' || scan[1] != '_')) - scan++; - - /* Move to last "__" in this sequence. */ - while (*scan && *scan == '_') - scan++; - scan -= 2; - } - - /* Delete saved state. */ - delete_work_stuff (&work_init); - string_delete (&decl_init); - - return success; -} - -/* - -LOCAL FUNCTION - - demangle_prefix -- consume the mangled name prefix and find signature - -SYNOPSIS - - static int - demangle_prefix (struct work_stuff *work, const char **mangled, - string *declp); - -DESCRIPTION - - Consume and demangle the prefix of the mangled name. - While processing the function name root, arrange to call - demangle_signature if the root is ambiguous. - - DECLP points to the string buffer into which demangled output is - placed. On entry, the buffer is empty. On exit it contains - the root function name, the demangled operator name, or in some - special cases either nothing or the completely demangled result. - - MANGLED points to the current pointer into the mangled name. As each - token of the mangled name is consumed, it is updated. Upon entry - the current mangled name pointer points to the first character of - the mangled name. Upon exit, it should point to the first character - of the signature if demangling was successful, or to the first - unconsumed character if demangling of the prefix was unsuccessful. - - Returns 1 on success, 0 otherwise. - */ - -static int -demangle_prefix (work, mangled, declp) - struct work_stuff *work; - const char **mangled; - string *declp; -{ - int success = 1; - const char *scan; - int i; - - if (strlen(*mangled) > 6 - && (strncmp(*mangled, "_imp__", 6) == 0 - || strncmp(*mangled, "__imp_", 6) == 0)) - { - /* it's a symbol imported from a PE dynamic library. Check for both - new style prefix _imp__ and legacy __imp_ used by older versions - of dlltool. */ - (*mangled) += 6; - work->dllimported = 1; - } - else if (strlen(*mangled) >= 11 && strncmp(*mangled, "_GLOBAL_", 8) == 0) - { - char *marker = strchr (cplus_markers, (*mangled)[8]); - if (marker != NULL && *marker == (*mangled)[10]) - { - if ((*mangled)[9] == 'D') - { - /* it's a GNU global destructor to be executed at program exit */ - (*mangled) += 11; - work->destructor = 2; - if (gnu_special (work, mangled, declp)) - return success; - } - else if ((*mangled)[9] == 'I') - { - /* it's a GNU global constructor to be executed at program init */ - (*mangled) += 11; - work->constructor = 2; - if (gnu_special (work, mangled, declp)) - return success; - } - } - } - else if ((ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING) && strncmp(*mangled, "__std__", 7) == 0) - { - /* it's a ARM global destructor to be executed at program exit */ - (*mangled) += 7; - work->destructor = 2; - } - else if ((ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING) && strncmp(*mangled, "__sti__", 7) == 0) - { - /* it's a ARM global constructor to be executed at program initial */ - (*mangled) += 7; - work->constructor = 2; - } - - /* This block of code is a reduction in strength time optimization - of: - scan = strstr (*mangled, "__"); */ - - { - scan = *mangled; - - do { - scan = strchr (scan, '_'); - } while (scan != NULL && *++scan != '_'); - - if (scan != NULL) --scan; - } - - if (scan != NULL) - { - /* We found a sequence of two or more '_', ensure that we start at - the last pair in the sequence. */ - /* i = strspn (scan, "_"); */ - i = 0; - while (scan[i] == '_') i++; - if (i > 2) - { - scan += (i - 2); - } - } - - if (scan == NULL) - { - success = 0; - } - else if (work -> static_type) - { - if (!ISDIGIT ((unsigned char)scan[0]) && (scan[0] != 't')) - { - success = 0; - } - } - else if ((scan == *mangled) - && (ISDIGIT ((unsigned char)scan[2]) || (scan[2] == 'Q') - || (scan[2] == 't') || (scan[2] == 'K') || (scan[2] == 'H'))) - { - /* The ARM says nothing about the mangling of local variables. - But cfront mangles local variables by prepending __ - to them. As an extension to ARM demangling we handle this case. */ - if ((LUCID_DEMANGLING || ARM_DEMANGLING || HP_DEMANGLING) - && ISDIGIT ((unsigned char)scan[2])) - { - *mangled = scan + 2; - consume_count (mangled); - string_append (declp, *mangled); - *mangled += strlen (*mangled); - success = 1; - } - else - { - /* A GNU style constructor starts with __[0-9Qt]. But cfront uses - names like __Q2_3foo3bar for nested type names. So don't accept - this style of constructor for cfront demangling. A GNU - style member-template constructor starts with 'H'. */ - if (!(LUCID_DEMANGLING || ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING)) - work -> constructor += 1; - *mangled = scan + 2; - } - } - else if (ARM_DEMANGLING && scan[2] == 'p' && scan[3] == 't') - { - /* Cfront-style parameterized type. Handled later as a signature. */ - success = 1; - - /* ARM template? */ - demangle_arm_hp_template (work, mangled, strlen (*mangled), declp); - } - else if (EDG_DEMANGLING && ((scan[2] == 't' && scan[3] == 'm') - || (scan[2] == 'p' && scan[3] == 's') - || (scan[2] == 'p' && scan[3] == 't'))) - { - /* EDG-style parameterized type. Handled later as a signature. */ - success = 1; - - /* EDG template? */ - demangle_arm_hp_template (work, mangled, strlen (*mangled), declp); - } - else if ((scan == *mangled) && !ISDIGIT ((unsigned char)scan[2]) - && (scan[2] != 't')) - { - /* Mangled name starts with "__". Skip over any leading '_' characters, - then find the next "__" that separates the prefix from the signature. - */ - if (!(ARM_DEMANGLING || LUCID_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING) - || (arm_special (mangled, declp) == 0)) - { - while (*scan == '_') - { - scan++; - } - if ((scan = strstr (scan, "__")) == NULL || (*(scan + 2) == '\0')) - { - /* No separator (I.E. "__not_mangled"), or empty signature - (I.E. "__not_mangled_either__") */ - success = 0; - } - else - return iterate_demangle_function (work, mangled, declp, scan); - } - } - else if (*(scan + 2) != '\0') - { - /* Mangled name does not start with "__" but does have one somewhere - in there with non empty stuff after it. Looks like a global - function name. Iterate over all "__":s until the right - one is found. */ - return iterate_demangle_function (work, mangled, declp, scan); - } - else - { - /* Doesn't look like a mangled name */ - success = 0; - } - - if (!success && (work->constructor == 2 || work->destructor == 2)) - { - string_append (declp, *mangled); - *mangled += strlen (*mangled); - success = 1; - } - return (success); -} - -/* - -LOCAL FUNCTION - - gnu_special -- special handling of gnu mangled strings - -SYNOPSIS - - static int - gnu_special (struct work_stuff *work, const char **mangled, - string *declp); - - -DESCRIPTION - - Process some special GNU style mangling forms that don't fit - the normal pattern. For example: - - _$_3foo (destructor for class foo) - _vt$foo (foo virtual table) - _vt$foo$bar (foo::bar virtual table) - __vt_foo (foo virtual table, new style with thunks) - _3foo$varname (static data member) - _Q22rs2tu$vw (static data member) - __t6vector1Zii (constructor with template) - __thunk_4__$_7ostream (virtual function thunk) - */ - -static int -gnu_special (work, mangled, declp) - struct work_stuff *work; - const char **mangled; - string *declp; -{ - int n; - int success = 1; - const char *p; - - if ((*mangled)[0] == '_' - && strchr (cplus_markers, (*mangled)[1]) != NULL - && (*mangled)[2] == '_') - { - /* Found a GNU style destructor, get past "__" */ - (*mangled) += 3; - work -> destructor += 1; - } - else if ((*mangled)[0] == '_' - && (((*mangled)[1] == '_' - && (*mangled)[2] == 'v' - && (*mangled)[3] == 't' - && (*mangled)[4] == '_') - || ((*mangled)[1] == 'v' - && (*mangled)[2] == 't' - && strchr (cplus_markers, (*mangled)[3]) != NULL))) - { - /* Found a GNU style virtual table, get past "_vt" - and create the decl. Note that we consume the entire mangled - input string, which means that demangle_signature has no work - to do. */ - if ((*mangled)[2] == 'v') - (*mangled) += 5; /* New style, with thunks: "__vt_" */ - else - (*mangled) += 4; /* Old style, no thunks: "_vt" */ - while (**mangled != '\0') - { - switch (**mangled) - { - case 'Q': - case 'K': - success = demangle_qualified (work, mangled, declp, 0, 1); - break; - case 't': - success = demangle_template (work, mangled, declp, 0, 1, - 1); - break; - default: - if (ISDIGIT((unsigned char)*mangled[0])) - { - n = consume_count(mangled); - /* We may be seeing a too-large size, or else a - "." indicating a static local symbol. In - any case, declare victory and move on; *don't* try - to use n to allocate. */ - if (n > (int) strlen (*mangled)) - { - success = 1; - break; - } - } - else - { - /*n = strcspn (*mangled, cplus_markers);*/ - const char *check = *mangled; - n = 0; - while (*check) - if (strchr (cplus_markers, *check++) == NULL) - n++; - else - break; - } - string_appendn (declp, *mangled, n); - (*mangled) += n; - } - - p = strpbrk (*mangled, cplus_markers); - if (success && ((p == NULL) || (p == *mangled))) - { - if (p != NULL) - { - string_append (declp, SCOPE_STRING (work)); - (*mangled)++; - } - } - else - { - success = 0; - break; - } - } - if (success) - string_append (declp, " virtual table"); - } - else if ((*mangled)[0] == '_' - && (strchr("0123456789Qt", (*mangled)[1]) != NULL) - && (p = strpbrk (*mangled, cplus_markers)) != NULL) - { - /* static data member, "_3foo$varname" for example */ - (*mangled)++; - switch (**mangled) - { - case 'Q': - case 'K': - success = demangle_qualified (work, mangled, declp, 0, 1); - break; - case 't': - success = demangle_template (work, mangled, declp, 0, 1, 1); - break; - default: - n = consume_count (mangled); - if (n < 0 || n > (long) strlen (*mangled)) - { - success = 0; - break; - } - - if (n > 10 && strncmp (*mangled, "_GLOBAL_", 8) == 0 - && (*mangled)[9] == 'N' - && (*mangled)[8] == (*mangled)[10] - && strchr (cplus_markers, (*mangled)[8])) - { - /* A member of the anonymous namespace. There's information - about what identifier or filename it was keyed to, but - it's just there to make the mangled name unique; we just - step over it. */ - string_append (declp, "{anonymous}"); - (*mangled) += n; - - /* Now p points to the marker before the N, so we need to - update it to the first marker after what we consumed. */ - p = strpbrk (*mangled, cplus_markers); - break; - } - - string_appendn (declp, *mangled, n); - (*mangled) += n; - } - if (success && (p == *mangled)) - { - /* Consumed everything up to the cplus_marker, append the - variable name. */ - (*mangled)++; - string_append (declp, SCOPE_STRING (work)); - n = strlen (*mangled); - string_appendn (declp, *mangled, n); - (*mangled) += n; - } - else - { - success = 0; - } - } - else if (strncmp (*mangled, "__thunk_", 8) == 0) - { - int delta; - - (*mangled) += 8; - delta = consume_count (mangled); - if (delta == -1) - success = 0; - else - { - char *method = internal_cplus_demangle (work, ++*mangled); - - if (method) - { - char buf[50]; - sprintf (buf, "virtual function thunk (delta:%d) for ", -delta); - string_append (declp, buf); - string_append (declp, method); - free (method); - n = strlen (*mangled); - (*mangled) += n; - } - else - { - success = 0; - } - } - } - else if (strncmp (*mangled, "__t", 3) == 0 - && ((*mangled)[3] == 'i' || (*mangled)[3] == 'f')) - { - p = (*mangled)[3] == 'i' ? " type_info node" : " type_info function"; - (*mangled) += 4; - switch (**mangled) - { - case 'Q': - case 'K': - success = demangle_qualified (work, mangled, declp, 0, 1); - break; - case 't': - success = demangle_template (work, mangled, declp, 0, 1, 1); - break; - default: - success = do_type (work, mangled, declp); - break; - } - if (success && **mangled != '\0') - success = 0; - if (success) - string_append (declp, p); - } - else - { - success = 0; - } - return (success); -} - -static void -recursively_demangle(work, mangled, result, namelength) - struct work_stuff *work; - const char **mangled; - string *result; - int namelength; -{ - char * recurse = (char *)NULL; - char * recurse_dem = (char *)NULL; - - recurse = (char *) xmalloc (namelength + 1); - memcpy (recurse, *mangled, namelength); - recurse[namelength] = '\000'; - - recurse_dem = VG_(cplus_demangle) (recurse, work->options); - - if (recurse_dem) - { - string_append (result, recurse_dem); - free (recurse_dem); - } - else - { - string_appendn (result, *mangled, namelength); - } - free (recurse); - *mangled += namelength; -} - -/* - -LOCAL FUNCTION - - arm_special -- special handling of ARM/lucid mangled strings - -SYNOPSIS - - static int - arm_special (const char **mangled, - string *declp); - - -DESCRIPTION - - Process some special ARM style mangling forms that don't fit - the normal pattern. For example: - - __vtbl__3foo (foo virtual table) - __vtbl__3foo__3bar (bar::foo virtual table) - - */ - -static int -arm_special (mangled, declp) - const char **mangled; - string *declp; -{ - int n; - int success = 1; - const char *scan; - - if (strncmp (*mangled, ARM_VTABLE_STRING, ARM_VTABLE_STRLEN) == 0) - { - /* Found a ARM style virtual table, get past ARM_VTABLE_STRING - and create the decl. Note that we consume the entire mangled - input string, which means that demangle_signature has no work - to do. */ - scan = *mangled + ARM_VTABLE_STRLEN; - while (*scan != '\0') /* first check it can be demangled */ - { - n = consume_count (&scan); - if (n == -1) - { - return (0); /* no good */ - } - scan += n; - if (scan[0] == '_' && scan[1] == '_') - { - scan += 2; - } - } - (*mangled) += ARM_VTABLE_STRLEN; - while (**mangled != '\0') - { - n = consume_count (mangled); - if (n == -1 - || n > (long) strlen (*mangled)) - return 0; - string_prependn (declp, *mangled, n); - (*mangled) += n; - if ((*mangled)[0] == '_' && (*mangled)[1] == '_') - { - string_prepend (declp, "::"); - (*mangled) += 2; - } - } - string_append (declp, " virtual table"); - } - else - { - success = 0; - } - return (success); -} - -/* - -LOCAL FUNCTION - - demangle_qualified -- demangle 'Q' qualified name strings - -SYNOPSIS - - static int - demangle_qualified (struct work_stuff *, const char *mangled, - string *result, int isfuncname, int append); - -DESCRIPTION - - Demangle a qualified name, such as "Q25Outer5Inner" which is - the mangled form of "Outer::Inner". The demangled output is - prepended or appended to the result string according to the - state of the append flag. - - If isfuncname is nonzero, then the qualified name we are building - is going to be used as a member function name, so if it is a - constructor or destructor function, append an appropriate - constructor or destructor name. I.E. for the above example, - the result for use as a constructor is "Outer::Inner::Inner" - and the result for use as a destructor is "Outer::Inner::~Inner". - -BUGS - - Numeric conversion is ASCII dependent (FIXME). - - */ - -static int -demangle_qualified (work, mangled, result, isfuncname, append) - struct work_stuff *work; - const char **mangled; - string *result; - int isfuncname; - int append; -{ - int qualifiers = 0; - int success = 1; - string temp; - string last_name; - int bindex = register_Btype (work); - - /* We only make use of ISFUNCNAME if the entity is a constructor or - destructor. */ - isfuncname = (isfuncname - && ((work->constructor & 1) || (work->destructor & 1))); - - string_init (&temp); - string_init (&last_name); - - if ((*mangled)[0] == 'K') - { - /* Squangling qualified name reuse */ - int idx; - (*mangled)++; - idx = consume_count_with_underscores (mangled); - if (idx == -1 || idx >= work -> numk) - success = 0; - else - string_append (&temp, work -> ktypevec[idx]); - } - else - switch ((*mangled)[1]) - { - case '_': - /* GNU mangled name with more than 9 classes. The count is preceded - by an underscore (to distinguish it from the <= 9 case) and followed - by an underscore. */ - (*mangled)++; - qualifiers = consume_count_with_underscores (mangled); - if (qualifiers == -1) - success = 0; - break; - - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - /* The count is in a single digit. */ - qualifiers = (*mangled)[1] - '0'; - - /* If there is an underscore after the digit, skip it. This is - said to be for ARM-qualified names, but the ARM makes no - mention of such an underscore. Perhaps cfront uses one. */ - if ((*mangled)[2] == '_') - { - (*mangled)++; - } - (*mangled) += 2; - break; - - case '0': - default: - success = 0; - } - - if (!success) - { - string_delete (&last_name); - string_delete (&temp); - return success; - } - - /* Pick off the names and collect them in the temp buffer in the order - in which they are found, separated by '::'. */ - - while (qualifiers-- > 0) - { - int remember_K = 1; - string_clear (&last_name); - - if (*mangled[0] == '_') - (*mangled)++; - - if (*mangled[0] == 't') - { - /* Here we always append to TEMP since we will want to use - the template name without the template parameters as a - constructor or destructor name. The appropriate - (parameter-less) value is returned by demangle_template - in LAST_NAME. We do not remember the template type here, - in order to match the G++ mangling algorithm. */ - success = demangle_template(work, mangled, &temp, - &last_name, 1, 0); - if (!success) - break; - } - else if (*mangled[0] == 'K') - { - int idx; - (*mangled)++; - idx = consume_count_with_underscores (mangled); - if (idx == -1 || idx >= work->numk) - success = 0; - else - string_append (&temp, work->ktypevec[idx]); - remember_K = 0; - - if (!success) break; - } - else - { - if (EDG_DEMANGLING) - { - int namelength; - /* Now recursively demangle the qualifier - * This is necessary to deal with templates in - * mangling styles like EDG */ - namelength = consume_count (mangled); - if (namelength == -1) - { - success = 0; - break; - } - recursively_demangle(work, mangled, &temp, namelength); - } - else - { - string temp_last_name; - string_init (&temp_last_name); - success = do_type (work, mangled, &temp_last_name); - if (!success) - { - string_delete (&temp_last_name); - break; - } - string_appends (&temp, &temp_last_name); - string_appends (&last_name, &temp_last_name); - string_delete (&temp_last_name); - } - } - - if (remember_K) - remember_Ktype (work, temp.b, LEN_STRING (&temp)); - - if (qualifiers > 0) - string_append (&temp, SCOPE_STRING (work)); - } - - remember_Btype (work, temp.b, LEN_STRING (&temp), bindex); - - /* If we are using the result as a function name, we need to append - the appropriate '::' separated constructor or destructor name. - We do this here because this is the most convenient place, where - we already have a pointer to the name and the length of the name. */ - - if (isfuncname) - { - string_append (&temp, SCOPE_STRING (work)); - if (work -> destructor & 1) - string_append (&temp, "~"); - string_appends (&temp, &last_name); - } - - /* Now either prepend the temp buffer to the result, or append it, - depending upon the state of the append flag. */ - - if (append) - string_appends (result, &temp); - else - { - if (!STRING_EMPTY (result)) - string_append (&temp, SCOPE_STRING (work)); - string_prepends (result, &temp); - } - - string_delete (&last_name); - string_delete (&temp); - return (success); -} - -/* - -LOCAL FUNCTION - - get_count -- convert an ascii count to integer, consuming tokens - -SYNOPSIS - - static int - get_count (const char **type, int *count) - -DESCRIPTION - - Assume that *type points at a count in a mangled name; set - *count to its value, and set *type to the next character after - the count. There are some weird rules in effect here. - - If *type does not point at a string of digits, return zero. - - If *type points at a string of digits followed by an - underscore, set *count to their value as an integer, advance - *type to point *after the underscore, and return 1. - - If *type points at a string of digits not followed by an - underscore, consume only the first digit. Set *count to its - value as an integer, leave *type pointing after that digit, - and return 1. - - The excuse for this odd behavior: in the ARM and HP demangling - styles, a type can be followed by a repeat count of the form - `Nxy', where: - - `x' is a single digit specifying how many additional copies - of the type to append to the argument list, and - - `y' is one or more digits, specifying the zero-based index of - the first repeated argument in the list. Yes, as you're - unmangling the name you can figure this out yourself, but - it's there anyway. - - So, for example, in `bar__3fooFPiN51', the first argument is a - pointer to an integer (`Pi'), and then the next five arguments - are the same (`N5'), and the first repeat is the function's - second argument (`1'). -*/ - -static int -get_count (type, count) - const char **type; - int *count; -{ - const char *p; - int n; - - if (!ISDIGIT ((unsigned char)**type)) - return (0); - else - { - *count = **type - '0'; - (*type)++; - if (ISDIGIT ((unsigned char)**type)) - { - p = *type; - n = *count; - do - { - n *= 10; - n += *p - '0'; - p++; - } - while (ISDIGIT ((unsigned char)*p)); - if (*p == '_') - { - *type = p + 1; - *count = n; - } - } - } - return (1); -} - -/* RESULT will be initialised here; it will be freed on failure. The - value returned is really a type_kind_t. */ - -static int -do_type (work, mangled, result) - struct work_stuff *work; - const char **mangled; - string *result; -{ - int n; - int done; - int success; - string decl; - const char *remembered_type; - int type_quals; - string btype; - type_kind_t tk = tk_none; - - string_init (&btype); - string_init (&decl); - string_init (result); - - done = 0; - success = 1; - while (success && !done) - { - int member; - switch (**mangled) - { - - /* A pointer type */ - case 'P': - case 'p': - (*mangled)++; - if (! (work -> options & DMGL_JAVA)) - string_prepend (&decl, "*"); - if (tk == tk_none) - tk = tk_pointer; - break; - - /* A reference type */ - case 'R': - (*mangled)++; - string_prepend (&decl, "&"); - if (tk == tk_none) - tk = tk_reference; - break; - - /* An array */ - case 'A': - { - ++(*mangled); - if (!STRING_EMPTY (&decl) - && (decl.b[0] == '*' || decl.b[0] == '&')) - { - string_prepend (&decl, "("); - string_append (&decl, ")"); - } - string_append (&decl, "["); - if (**mangled != '_') - success = demangle_template_value_parm (work, mangled, &decl, - tk_integral); - if (**mangled == '_') - ++(*mangled); - string_append (&decl, "]"); - break; - } - - /* A back reference to a previously seen type */ - case 'T': - (*mangled)++; - if (!get_count (mangled, &n) || n >= work -> ntypes) - { - success = 0; - } - else - { - remembered_type = work -> typevec[n]; - mangled = &remembered_type; - } - break; - - /* A function */ - case 'F': - (*mangled)++; - if (!STRING_EMPTY (&decl) - && (decl.b[0] == '*' || decl.b[0] == '&')) - { - string_prepend (&decl, "("); - string_append (&decl, ")"); - } - /* After picking off the function args, we expect to either find the - function return type (preceded by an '_') or the end of the - string. */ - if (!demangle_nested_args (work, mangled, &decl) - || (**mangled != '_' && **mangled != '\0')) - { - success = 0; - break; - } - if (success && (**mangled == '_')) - (*mangled)++; - break; - - case 'M': - case 'O': - { - type_quals = TYPE_UNQUALIFIED; - - member = **mangled == 'M'; - (*mangled)++; - - string_append (&decl, ")"); - - /* We don't need to prepend `::' for a qualified name; - demangle_qualified will do that for us. */ - if (**mangled != 'Q') - string_prepend (&decl, SCOPE_STRING (work)); - - if (ISDIGIT ((unsigned char)**mangled)) - { - n = consume_count (mangled); - if (n == -1 - || (int) strlen (*mangled) < n) - { - success = 0; - break; - } - string_prependn (&decl, *mangled, n); - *mangled += n; - } - else if (**mangled == 'X' || **mangled == 'Y') - { - string temp; - do_type (work, mangled, &temp); - string_prepends (&decl, &temp); - } - else if (**mangled == 't') - { - string temp; - string_init (&temp); - success = demangle_template (work, mangled, &temp, - NULL, 1, 1); - if (success) - { - string_prependn (&decl, temp.b, temp.p - temp.b); - string_clear (&temp); - } - else - break; - } - else if (**mangled == 'Q') - { - success = demangle_qualified (work, mangled, &decl, - /*isfuncnam=*/0, - /*append=*/0); - if (!success) - break; - } - else - { - success = 0; - break; - } - - string_prepend (&decl, "("); - if (member) - { - switch (**mangled) - { - case 'C': - case 'V': - case 'u': - type_quals |= code_for_qualifier (**mangled); - (*mangled)++; - break; - - default: - break; - } - - if (*(*mangled)++ != 'F') - { - success = 0; - break; - } - } - if ((member && !demangle_nested_args (work, mangled, &decl)) - || **mangled != '_') - { - success = 0; - break; - } - (*mangled)++; - if (! PRINT_ANSI_QUALIFIERS) - { - break; - } - if (type_quals != TYPE_UNQUALIFIED) - { - APPEND_BLANK (&decl); - string_append (&decl, qualifier_string (type_quals)); - } - break; - } - case 'G': - (*mangled)++; - break; - - case 'C': - case 'V': - case 'u': - if (PRINT_ANSI_QUALIFIERS) - { - if (!STRING_EMPTY (&decl)) - string_prepend (&decl, " "); - - string_prepend (&decl, demangle_qualifier (**mangled)); - } - (*mangled)++; - break; - /* - } - */ - - /* fall through */ - default: - done = 1; - break; - } - } - - if (success) switch (**mangled) - { - /* A qualified name, such as "Outer::Inner". */ - case 'Q': - case 'K': - { - success = demangle_qualified (work, mangled, result, 0, 1); - break; - } - - /* A back reference to a previously seen squangled type */ - case 'B': - (*mangled)++; - if (!get_count (mangled, &n) || n >= work -> numb) - success = 0; - else - string_append (result, work->btypevec[n]); - break; - - case 'X': - case 'Y': - /* A template parm. We substitute the corresponding argument. */ - { - int idx; - - (*mangled)++; - idx = consume_count_with_underscores (mangled); - - if (idx == -1 - || (work->tmpl_argvec && idx >= work->ntmpl_args) - || consume_count_with_underscores (mangled) == -1) - { - success = 0; - break; - } - - if (work->tmpl_argvec) - string_append (result, work->tmpl_argvec[idx]); - else - string_append_template_idx (result, idx); - - success = 1; - } - break; - - default: - success = demangle_fund_type (work, mangled, result); - if (tk == tk_none) - tk = (type_kind_t) success; - break; - } - - if (success) - { - if (!STRING_EMPTY (&decl)) - { - string_append (result, " "); - string_appends (result, &decl); - } - } - else - string_delete (result); - string_delete (&decl); - - if (success) - /* Assume an integral type, if we're not sure. */ - return (int) ((tk == tk_none) ? tk_integral : tk); - else - return 0; -} - -/* Given a pointer to a type string that represents a fundamental type - argument (int, long, unsigned int, etc) in TYPE, a pointer to the - string in which the demangled output is being built in RESULT, and - the WORK structure, decode the types and add them to the result. - - For example: - - "Ci" => "const int" - "Sl" => "signed long" - "CUs" => "const unsigned short" - - The value returned is really a type_kind_t. */ - -static int -demangle_fund_type (work, mangled, result) - struct work_stuff *work; - const char **mangled; - string *result; -{ - int done = 0; - int success = 1; - char buf[10]; - unsigned int dec = 0; - string btype; - type_kind_t tk = tk_integral; - - string_init (&btype); - - /* First pick off any type qualifiers. There can be more than one. */ - - while (!done) - { - switch (**mangled) - { - case 'C': - case 'V': - case 'u': - if (PRINT_ANSI_QUALIFIERS) - { - if (!STRING_EMPTY (result)) - string_prepend (result, " "); - string_prepend (result, demangle_qualifier (**mangled)); - } - (*mangled)++; - break; - case 'U': - (*mangled)++; - APPEND_BLANK (result); - string_append (result, "unsigned"); - break; - case 'S': /* signed char only */ - (*mangled)++; - APPEND_BLANK (result); - string_append (result, "signed"); - break; - case 'J': - (*mangled)++; - APPEND_BLANK (result); - string_append (result, "__complex"); - break; - default: - done = 1; - break; - } - } - - /* Now pick off the fundamental type. There can be only one. */ - - switch (**mangled) - { - case '\0': - case '_': - break; - case 'v': - (*mangled)++; - APPEND_BLANK (result); - string_append (result, "void"); - break; - case 'x': - (*mangled)++; - APPEND_BLANK (result); - string_append (result, "long long"); - break; - case 'l': - (*mangled)++; - APPEND_BLANK (result); - string_append (result, "long"); - break; - case 'i': - (*mangled)++; - APPEND_BLANK (result); - string_append (result, "int"); - break; - case 's': - (*mangled)++; - APPEND_BLANK (result); - string_append (result, "short"); - break; - case 'b': - (*mangled)++; - APPEND_BLANK (result); - string_append (result, "bool"); - tk = tk_bool; - break; - case 'c': - (*mangled)++; - APPEND_BLANK (result); - string_append (result, "char"); - tk = tk_char; - break; - case 'w': - (*mangled)++; - APPEND_BLANK (result); - string_append (result, "wchar_t"); - tk = tk_char; - break; - case 'r': - (*mangled)++; - APPEND_BLANK (result); - string_append (result, "long double"); - tk = tk_real; - break; - case 'd': - (*mangled)++; - APPEND_BLANK (result); - string_append (result, "double"); - tk = tk_real; - break; - case 'f': - (*mangled)++; - APPEND_BLANK (result); - string_append (result, "float"); - tk = tk_real; - break; - case 'G': - (*mangled)++; - if (!ISDIGIT ((unsigned char)**mangled)) - { - success = 0; - break; - } - case 'I': - (*mangled)++; - if (**mangled == '_') - { - int i; - (*mangled)++; - for (i = 0; - i < (long) sizeof (buf) - 1 && **mangled && **mangled != '_'; - (*mangled)++, i++) - buf[i] = **mangled; - if (**mangled != '_') - { - success = 0; - break; - } - buf[i] = '\0'; - (*mangled)++; - } - else - { - strncpy (buf, *mangled, 2); - buf[2] = '\0'; - *mangled += min (strlen (*mangled), 2); - } - /*sscanf (buf, "%x", &dec); - sprintf (buf, "int%u_t", dec);*/ - sprintf (buf, "i_xx_t"); - APPEND_BLANK (result); - string_append (result, buf); - break; - - /* fall through */ - /* An explicit type, such as "6mytype" or "7integer" */ - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - int bindex = register_Btype (work); - string loc_btype; - string_init (&loc_btype); - if (demangle_class_name (work, mangled, &loc_btype)) { - remember_Btype (work, loc_btype.b, LEN_STRING (&loc_btype), bindex); - APPEND_BLANK (result); - string_appends (result, &loc_btype); - } - else - success = 0; - string_delete (&loc_btype); - break; - } - case 't': - { - success = demangle_template (work, mangled, &btype, 0, 1, 1); - string_appends (result, &btype); - break; - } - default: - success = 0; - break; - } - - string_delete (&btype); - - return success ? ((int) tk) : 0; -} - - -/* Handle a template's value parameter for HP aCC (extension from ARM) - **mangled points to 'S' or 'U' */ - -static int -do_hpacc_template_const_value (work, mangled, result) - struct work_stuff *work ATTRIBUTE_UNUSED; - const char **mangled; - string *result; -{ - int unsigned_const; - - if (**mangled != 'U' && **mangled != 'S') - return 0; - - unsigned_const = (**mangled == 'U'); - - (*mangled)++; - - switch (**mangled) - { - case 'N': - string_append (result, "-"); - /* fall through */ - case 'P': - (*mangled)++; - break; - case 'M': - /* special case for -2^31 */ - string_append (result, "-2147483648"); - (*mangled)++; - return 1; - default: - return 0; - } - - /* We have to be looking at an integer now */ - if (!(ISDIGIT ((unsigned char)**mangled))) - return 0; - - /* We only deal with integral values for template - parameters -- so it's OK to look only for digits */ - while (ISDIGIT ((unsigned char)**mangled)) - { - char_str[0] = **mangled; - string_append (result, char_str); - (*mangled)++; - } - - if (unsigned_const) - string_append (result, "U"); - - /* FIXME? Some day we may have 64-bit (or larger :-) ) constants - with L or LL suffixes. pai/1997-09-03 */ - - return 1; /* success */ -} - -/* Handle a template's literal parameter for HP aCC (extension from ARM) - **mangled is pointing to the 'A' */ - -static int -do_hpacc_template_literal (work, mangled, result) - struct work_stuff *work; - const char **mangled; - string *result; -{ - int literal_len = 0; - char * recurse; - char * recurse_dem; - - if (**mangled != 'A') - return 0; - - (*mangled)++; - - literal_len = consume_count (mangled); - - if (literal_len <= 0) - return 0; - - /* Literal parameters are names of arrays, functions, etc. and the - canonical representation uses the address operator */ - string_append (result, "&"); - - /* Now recursively demangle the literal name */ - recurse = (char *) xmalloc (literal_len + 1); - memcpy (recurse, *mangled, literal_len); - recurse[literal_len] = '\000'; - - recurse_dem = VG_(cplus_demangle) (recurse, work->options); - - if (recurse_dem) - { - string_append (result, recurse_dem); - free (recurse_dem); - } - else - { - string_appendn (result, *mangled, literal_len); - } - (*mangled) += literal_len; - free (recurse); - - return 1; -} - -static int -snarf_numeric_literal (args, arg) - const char ** args; - string * arg; -{ - if (**args == '-') - { - char_str[0] = '-'; - string_append (arg, char_str); - (*args)++; - } - else if (**args == '+') - (*args)++; - - if (!ISDIGIT ((unsigned char)**args)) - return 0; - - while (ISDIGIT ((unsigned char)**args)) - { - char_str[0] = **args; - string_append (arg, char_str); - (*args)++; - } - - return 1; -} - -/* Demangle the next argument, given by MANGLED into RESULT, which - *should be an uninitialized* string. It will be initialized here, - and free'd should anything go wrong. */ - -static int -do_arg (work, mangled, result) - struct work_stuff *work; - const char **mangled; - string *result; -{ - /* Remember where we started so that we can record the type, for - non-squangling type remembering. */ - const char *start = *mangled; - string temp_result; - - string_init (result); - string_init (&temp_result); - - if (work->nrepeats > 0) - { - --work->nrepeats; - - if (work->previous_argument == 0) - return 0; - - /* We want to reissue the previous type in this argument list. */ - string_appends (result, work->previous_argument); - return 1; - } - - if (**mangled == 'n') - { - /* A squangling-style repeat. */ - (*mangled)++; - work->nrepeats = consume_count(mangled); - - if (work->nrepeats <= 0) - /* This was not a repeat count after all. */ - return 0; - - if (work->nrepeats > 9) - { - if (**mangled != '_') - /* The repeat count should be followed by an '_' in this - case. */ - return 0; - else - (*mangled)++; - } - - /* Now, the repeat is all set up. */ - return do_arg (work, mangled, result); - } - - /* Save the result in WORK->previous_argument so that we can find it - if it's repeated. Note that saving START is not good enough: we - do not want to add additional types to the back-referenceable - type vector when processing a repeated type. */ - if (work->previous_argument) - string_clear (work->previous_argument); - else - { - work->previous_argument = (string*) xmalloc (sizeof (string)); - string_init (work->previous_argument); - } - - if (!do_type (work, mangled, &temp_result)) - { - string_delete (&temp_result); - return 0; - } - string_appends (work->previous_argument, &temp_result); - string_delete (&temp_result); - - string_appends (result, work->previous_argument); - - remember_type (work, start, *mangled - start); - return 1; -} - -static void -remember_type (work, start, len) - struct work_stuff *work; - const char *start; - int len; -{ - char *tem; - - if (work->forgetting_types) - return; - - if (work -> ntypes >= work -> typevec_size) - { - if (work -> typevec_size == 0) - { - work -> typevec_size = 3; - work -> typevec - = (char **) xmalloc (sizeof (char *) * work -> typevec_size); - } - else - { - work -> typevec_size *= 2; - work -> typevec - = (char **) xrealloc ((char *)work -> typevec, - sizeof (char *) * work -> typevec_size); - } - } - tem = xmalloc (len + 1); - memcpy (tem, start, len); - tem[len] = '\0'; - work -> typevec[work -> ntypes++] = tem; -} - - -/* Remember a K type class qualifier. */ -static void -remember_Ktype (work, start, len) - struct work_stuff *work; - const char *start; - int len; -{ - char *tem; - - if (work -> numk >= work -> ksize) - { - if (work -> ksize == 0) - { - work -> ksize = 5; - work -> ktypevec - = (char **) xmalloc (sizeof (char *) * work -> ksize); - } - else - { - work -> ksize *= 2; - work -> ktypevec - = (char **) xrealloc ((char *)work -> ktypevec, - sizeof (char *) * work -> ksize); - } - } - tem = xmalloc (len + 1); - memcpy (tem, start, len); - tem[len] = '\0'; - work -> ktypevec[work -> numk++] = tem; -} - -/* Register a B code, and get an index for it. B codes are registered - as they are seen, rather than as they are completed, so map > - registers map > as B0, and temp as B1 */ - -static int -register_Btype (work) - struct work_stuff *work; -{ - int ret; - - if (work -> numb >= work -> bsize) - { - if (work -> bsize == 0) - { - work -> bsize = 5; - work -> btypevec - = (char **) xmalloc (sizeof (char *) * work -> bsize); - } - else - { - work -> bsize *= 2; - work -> btypevec - = (char **) xrealloc ((char *)work -> btypevec, - sizeof (char *) * work -> bsize); - } - } - ret = work -> numb++; - work -> btypevec[ret] = NULL; - return(ret); -} - -/* Store a value into a previously registered B code type. */ - -static void -remember_Btype (work, start, len, ind) - struct work_stuff *work; - const char *start; - int len, ind; -{ - char *tem; - - tem = xmalloc (len + 1); - memcpy (tem, start, len); - tem[len] = '\0'; - work -> btypevec[ind] = tem; -} - -/* Lose all the info related to B and K type codes. */ -static void -forget_B_and_K_types (work) - struct work_stuff *work; -{ - int i; - - while (work -> numk > 0) - { - i = --(work -> numk); - if (work -> ktypevec[i] != NULL) - { - free (work -> ktypevec[i]); - work -> ktypevec[i] = NULL; - } - } - - while (work -> numb > 0) - { - i = --(work -> numb); - if (work -> btypevec[i] != NULL) - { - free (work -> btypevec[i]); - work -> btypevec[i] = NULL; - } - } -} -/* Forget the remembered types, but not the type vector itself. */ - -static void -forget_types (work) - struct work_stuff *work; -{ - int i; - - while (work -> ntypes > 0) - { - i = --(work -> ntypes); - if (work -> typevec[i] != NULL) - { - free (work -> typevec[i]); - work -> typevec[i] = NULL; - } - } -} - -/* Process the argument list part of the signature, after any class spec - has been consumed, as well as the first 'F' character (if any). For - example: - - "__als__3fooRT0" => process "RT0" - "complexfunc5__FPFPc_PFl_i" => process "PFPc_PFl_i" - - DECLP must be already initialised, usually non-empty. It won't be freed - on failure. - - Note that g++ differs significantly from ARM and lucid style mangling - with regards to references to previously seen types. For example, given - the source fragment: - - class foo { - public: - foo::foo (int, foo &ia, int, foo &ib, int, foo &ic); - }; - - foo::foo (int, foo &ia, int, foo &ib, int, foo &ic) { ia = ib = ic; } - void foo (int, foo &ia, int, foo &ib, int, foo &ic) { ia = ib = ic; } - - g++ produces the names: - - __3fooiRT0iT2iT2 - foo__FiR3fooiT1iT1 - - while lcc (and presumably other ARM style compilers as well) produces: - - foo__FiR3fooT1T2T1T2 - __ct__3fooFiR3fooT1T2T1T2 - - Note that g++ bases its type numbers starting at zero and counts all - previously seen types, while lucid/ARM bases its type numbers starting - at one and only considers types after it has seen the 'F' character - indicating the start of the function args. For lucid/ARM style, we - account for this difference by discarding any previously seen types when - we see the 'F' character, and subtracting one from the type number - reference. - - */ - -static int -demangle_args (work, mangled, declp) - struct work_stuff *work; - const char **mangled; - string *declp; -{ - string arg; - int need_comma = 0; - int r; - int t; - const char *tem; - char temptype; - - if (PRINT_ARG_TYPES) - { - string_append (declp, "("); - if (**mangled == '\0') - { - string_append (declp, "void"); - } - } - - while ((**mangled != '_' && **mangled != '\0' && **mangled != 'e') - || work->nrepeats > 0) - { - if ((**mangled == 'N') || (**mangled == 'T')) - { - temptype = *(*mangled)++; - - if (temptype == 'N') - { - if (!get_count (mangled, &r)) - { - return (0); - } - } - else - { - r = 1; - } - if ((HP_DEMANGLING || ARM_DEMANGLING || EDG_DEMANGLING) && work -> ntypes >= 10) - { - /* If we have 10 or more types we might have more than a 1 digit - index so we'll have to consume the whole count here. This - will lose if the next thing is a type name preceded by a - count but it's impossible to demangle that case properly - anyway. Eg if we already have 12 types is T12Pc "(..., type1, - Pc, ...)" or "(..., type12, char *, ...)" */ - if ((t = consume_count(mangled)) <= 0) - { - return (0); - } - } - else - { - if (!get_count (mangled, &t)) - { - return (0); - } - } - if (LUCID_DEMANGLING || ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING) - { - t--; - } - /* Validate the type index. Protect against illegal indices from - malformed type strings. */ - if ((t < 0) || (t >= work -> ntypes)) - { - return (0); - } - while (work->nrepeats > 0 || --r >= 0) - { - tem = work -> typevec[t]; - if (need_comma && PRINT_ARG_TYPES) - { - string_append (declp, ", "); - } - if (!do_arg (work, &tem, &arg)) - { - return (0); - } - if (PRINT_ARG_TYPES) - { - string_appends (declp, &arg); - } - string_delete (&arg); - need_comma = 1; - } - } - else - { - if (need_comma && PRINT_ARG_TYPES) - string_append (declp, ", "); - if (!do_arg (work, mangled, &arg)) - { - string_delete (&arg); - return (0); - } - if (PRINT_ARG_TYPES) - string_appends (declp, &arg); - string_delete (&arg); - need_comma = 1; - } - } - - if (**mangled == 'e') - { - (*mangled)++; - if (PRINT_ARG_TYPES) - { - if (need_comma) - { - string_append (declp, ","); - } - string_append (declp, "..."); - } - } - - if (PRINT_ARG_TYPES) - { - string_append (declp, ")"); - } - return (1); -} - -/* Like demangle_args, but for demangling the argument lists of function - and method pointers or references, not top-level declarations. */ - -static int -demangle_nested_args (work, mangled, declp) - struct work_stuff *work; - const char **mangled; - string *declp; -{ - string* saved_previous_argument; - int result; - int saved_nrepeats; - - /* The G++ name-mangling algorithm does not remember types on nested - argument lists, unless -fsquangling is used, and in that case the - type vector updated by remember_type is not used. So, we turn - off remembering of types here. */ - ++work->forgetting_types; - - /* For the repeat codes used with -fsquangling, we must keep track of - the last argument. */ - saved_previous_argument = work->previous_argument; - saved_nrepeats = work->nrepeats; - work->previous_argument = 0; - work->nrepeats = 0; - - /* Actually demangle the arguments. */ - result = demangle_args (work, mangled, declp); - - /* Restore the previous_argument field. */ - if (work->previous_argument) - { - string_delete (work->previous_argument); - free ((char*) work->previous_argument); - } - work->previous_argument = saved_previous_argument; - --work->forgetting_types; - work->nrepeats = saved_nrepeats; - - return result; -} - -static void -demangle_function_name (work, mangled, declp, scan) - struct work_stuff *work; - const char **mangled; - string *declp; - const char *scan; -{ - size_t i; - string type; - const char *tem; - - string_appendn (declp, (*mangled), scan - (*mangled)); - string_need (declp, 1); - *(declp -> p) = '\0'; - - /* Consume the function name, including the "__" separating the name - from the signature. We are guaranteed that SCAN points to the - separator. */ - - (*mangled) = scan + 2; - /* We may be looking at an instantiation of a template function: - foo__Xt1t2_Ft3t4, where t1, t2, ... are template arguments and a - following _F marks the start of the function arguments. Handle - the template arguments first. */ - - if (HP_DEMANGLING && (**mangled == 'X')) - { - demangle_arm_hp_template (work, mangled, 0, declp); - /* This leaves MANGLED pointing to the 'F' marking func args */ - } - - if (LUCID_DEMANGLING || ARM_DEMANGLING || HP_DEMANGLING || EDG_DEMANGLING) - { - - /* See if we have an ARM style constructor or destructor operator. - If so, then just record it, clear the decl, and return. - We can't build the actual constructor/destructor decl until later, - when we recover the class name from the signature. */ - - if (strcmp (declp -> b, "__ct") == 0) - { - work -> constructor += 1; - string_clear (declp); - return; - } - else if (strcmp (declp -> b, "__dt") == 0) - { - work -> destructor += 1; - string_clear (declp); - return; - } - } - - if (declp->p - declp->b >= 3 - && declp->b[0] == 'o' - && declp->b[1] == 'p' - && strchr (cplus_markers, declp->b[2]) != NULL) - { - /* see if it's an assignment expression */ - if (declp->p - declp->b >= 10 /* op$assign_ */ - && memcmp (declp->b + 3, "assign_", 7) == 0) - { - for (i = 0; i < ARRAY_SIZE (optable); i++) - { - int len = declp->p - declp->b - 10; - if ((int) strlen (optable[i].in) == len - && memcmp (optable[i].in, declp->b + 10, len) == 0) - { - string_clear (declp); - string_append (declp, "operator"); - string_append (declp, optable[i].out); - string_append (declp, "="); - break; - } - } - } - else - { - for (i = 0; i < ARRAY_SIZE (optable); i++) - { - int len = declp->p - declp->b - 3; - if ((int) strlen (optable[i].in) == len - && memcmp (optable[i].in, declp->b + 3, len) == 0) - { - string_clear (declp); - string_append (declp, "operator"); - string_append (declp, optable[i].out); - break; - } - } - } - } - else if (declp->p - declp->b >= 5 && memcmp (declp->b, "type", 4) == 0 - && strchr (cplus_markers, declp->b[4]) != NULL) - { - /* type conversion operator */ - tem = declp->b + 5; - if (do_type (work, &tem, &type)) - { - string_clear (declp); - string_append (declp, "operator "); - string_appends (declp, &type); - string_delete (&type); - } - } - else if (declp->b[0] == '_' && declp->b[1] == '_' - && declp->b[2] == 'o' && declp->b[3] == 'p') - { - /* ANSI. */ - /* type conversion operator. */ - tem = declp->b + 4; - if (do_type (work, &tem, &type)) - { - string_clear (declp); - string_append (declp, "operator "); - string_appends (declp, &type); - string_delete (&type); - } - } - else if (declp->b[0] == '_' && declp->b[1] == '_' - && ISLOWER((unsigned char)declp->b[2]) - && ISLOWER((unsigned char)declp->b[3])) - { - if (declp->b[4] == '\0') - { - /* Operator. */ - for (i = 0; i < ARRAY_SIZE (optable); i++) - { - if (strlen (optable[i].in) == 2 - && memcmp (optable[i].in, declp->b + 2, 2) == 0) - { - string_clear (declp); - string_append (declp, "operator"); - string_append (declp, optable[i].out); - break; - } - } - } - else - { - if (declp->b[2] == 'a' && declp->b[5] == '\0') - { - /* Assignment. */ - for (i = 0; i < ARRAY_SIZE (optable); i++) - { - if (strlen (optable[i].in) == 3 - && memcmp (optable[i].in, declp->b + 2, 3) == 0) - { - string_clear (declp); - string_append (declp, "operator"); - string_append (declp, optable[i].out); - break; - } - } - } - } - } -} - -/* a mini string-handling package */ - -static void -string_need (s, n) - string *s; - int n; -{ - int tem; - - if (s->b == NULL) - { - if (n < 32) - { - n = 32; - } - s->p = s->b = xmalloc (n); - s->e = s->b + n; - } - else if (s->e - s->p < n) - { - tem = s->p - s->b; - n += tem; - n *= 2; - s->b = xrealloc (s->b, n); - s->p = s->b + tem; - s->e = s->b + n; - } -} - -static void -string_delete (s) - string *s; -{ - if (s->b != NULL) - { - free (s->b); - s->b = s->e = s->p = NULL; - } -} - -static void -string_init (s) - string *s; -{ - s->b = s->p = s->e = NULL; -} - -static void -string_clear (s) - string *s; -{ - s->p = s->b; -} - -#if 0 - -static int -string_empty (s) - string *s; -{ - return (s->b == s->p); -} - -#endif - -static void -string_append (p, s) - string *p; - const char *s; -{ - int n; - if (s == NULL || *s == '\0') - return; - n = strlen (s); - string_need (p, n); - memcpy (p->p, s, n); - p->p += n; -} - -static void -string_appends (p, s) - string *p, *s; -{ - int n; - - if (s->b != s->p) - { - n = s->p - s->b; - string_need (p, n); - memcpy (p->p, s->b, n); - p->p += n; - } -} - -static void -string_appendn (p, s, n) - string *p; - const char *s; - int n; -{ - if (n != 0) - { - string_need (p, n); - memcpy (p->p, s, n); - p->p += n; - } -} - -static void -string_prepend (p, s) - string *p; - const char *s; -{ - if (s != NULL && *s != '\0') - { - string_prependn (p, s, strlen (s)); - } -} - -static void -string_prepends (p, s) - string *p, *s; -{ - if (s->b != s->p) - { - string_prependn (p, s->b, s->p - s->b); - } -} - -static void -string_prependn (p, s, n) - string *p; - const char *s; - int n; -{ - char *q; - - if (n != 0) - { - string_need (p, n); - for (q = p->p - 1; q >= p->b; q--) - { - q[n] = q[0]; - } - memcpy (p->b, s, n); - p->p += n; - } -} - -static void -string_append_template_idx (s, idx) - string *s; - int idx; -{ - char buf[INTBUF_SIZE + 1 /* 'T' */]; - sprintf(buf, "T%d", idx); - string_append (s, buf); -} - -/* To generate a standalone demangler program for testing purposes, - just compile and link this file with -DMAIN and libiberty.a. When - run, it demangles each command line arg, or each stdin string, and - prints the result on stdout. */ - -#ifdef MAIN - -#include "getopt.h" - -static const char *program_name; -static const char *program_version = VERSION; -static int flags = DMGL_PARAMS | DMGL_ANSI | DMGL_VERBOSE; - -static void demangle_it PARAMS ((char *)); -static void usage PARAMS ((FILE *, int)) ATTRIBUTE_NORETURN; -static void fatal PARAMS ((const char *)) ATTRIBUTE_NORETURN; -static void print_demangler_list PARAMS ((FILE *)); - -static void -demangle_it (mangled_name) - char *mangled_name; -{ - char *result; - - /* For command line args, also try to demangle type encodings. */ - result = cplus_demangle (mangled_name, flags | DMGL_TYPES); - if (result == NULL) - { - printf ("%s\n", mangled_name); - } - else - { - printf ("%s\n", result); - free (result); - } -} - -static void -print_demangler_list (stream) - FILE *stream; -{ - const struct demangler_engine *demangler; - - fprintf (stream, "{%s", libiberty_demanglers->demangling_style_name); - - for (demangler = libiberty_demanglers + 1; - demangler->demangling_style != unknown_demangling; - ++demangler) - fprintf (stream, ",%s", demangler->demangling_style_name); - - fprintf (stream, "}"); -} - -static void -usage (stream, status) - FILE *stream; - int status; -{ - fprintf (stream, "\ -Usage: %s [-_] [-n] [--strip-underscores] [--no-strip-underscores] \n", - program_name); - - fprintf (stream, "\ - [-s "); - print_demangler_list (stream); - fprintf (stream, "]\n"); - - fprintf (stream, "\ - [--format "); - print_demangler_list (stream); - fprintf (stream, "]\n"); - - fprintf (stream, "\ - [--help] [--version] [arg...]\n"); - exit (status); -} - -#define MBUF_SIZE 32767 -char mbuffer[MBUF_SIZE]; - -/* Defined in the automatically-generated underscore.c. */ -extern int prepends_underscore; - -int strip_underscore = 0; - -static const struct option long_options[] = { - {"strip-underscores", no_argument, 0, '_'}, - {"format", required_argument, 0, 's'}, - {"help", no_argument, 0, 'h'}, - {"no-strip-underscores", no_argument, 0, 'n'}, - {"version", no_argument, 0, 'v'}, - {0, no_argument, 0, 0} -}; - -/* More 'friendly' abort that prints the line and file. - config.h can #define abort fancy_abort if you like that sort of thing. */ - -void -fancy_abort () -{ - fatal ("Internal gcc abort."); -} - - -static const char * -standard_symbol_characters PARAMS ((void)); - -static const char * -hp_symbol_characters PARAMS ((void)); - -static const char * -gnu_v3_symbol_characters PARAMS ((void)); - -/* Return the string of non-alnum characters that may occur - as a valid symbol component, in the standard assembler symbol - syntax. */ - -static const char * -standard_symbol_characters () -{ - return "_$."; -} - - -/* Return the string of non-alnum characters that may occur - as a valid symbol name component in an HP object file. - - Note that, since HP's compiler generates object code straight from - C++ source, without going through an assembler, its mangled - identifiers can use all sorts of characters that no assembler would - tolerate, so the alphabet this function creates is a little odd. - Here are some sample mangled identifiers offered by HP: - - typeid*__XT24AddressIndExpClassMember_ - [Vftptr]key:__dt__32OrdinaryCompareIndExpClassMemberFv - __ct__Q2_9Elf64_Dyn18{unnamed.union.#1}Fv - - This still seems really weird to me, since nowhere else in this - file is there anything to recognize curly brackets, parens, etc. - I've talked with Srikanth , and he assures me - this is right, but I still strongly suspect that there's a - misunderstanding here. - - If we decide it's better for c++filt to use HP's assembler syntax - to scrape identifiers out of its input, here's the definition of - the symbol name syntax from the HP assembler manual: - - Symbols are composed of uppercase and lowercase letters, decimal - digits, dollar symbol, period (.), ampersand (&), pound sign(#) and - underscore (_). A symbol can begin with a letter, digit underscore or - dollar sign. If a symbol begins with a digit, it must contain a - non-digit character. - - So have fun. */ -static const char * -hp_symbol_characters () -{ - return "_$.<>#,*&[]:(){}"; -} - - -/* Return the string of non-alnum characters that may occur - as a valid symbol component in the GNU C++ V3 ABI mangling - scheme. */ - -static const char * -gnu_v3_symbol_characters () -{ - return "_$."; -} - - -extern int main PARAMS ((int, char **)); - -int -main (argc, argv) - int argc; - char **argv; -{ - char *result; - int c; - const char *valid_symbols; - enum demangling_styles style = auto_demangling; - - program_name = argv[0]; - - strip_underscore = prepends_underscore; - - while ((c = getopt_long (argc, argv, "_ns:", long_options, (int *) 0)) != EOF) - { - switch (c) - { - case '?': - usage (stderr, 1); - break; - case 'h': - usage (stdout, 0); - case 'n': - strip_underscore = 0; - break; - case 'v': - printf ("GNU %s (C++ demangler), version %s\n", program_name, program_version); - return (0); - case '_': - strip_underscore = 1; - break; - case 's': - { - style = cplus_demangle_name_to_style (optarg); - if (style == unknown_demangling) - { - fprintf (stderr, "%s: unknown demangling style `%s'\n", - program_name, optarg); - return (1); - } - else - cplus_demangle_set_style (style); - } - break; - } - } - - if (optind < argc) - { - for ( ; optind < argc; optind++) - { - demangle_it (argv[optind]); - } - } - else - { - switch (current_demangling_style) - { - case gnu_demangling: - case lucid_demangling: - case arm_demangling: - case java_demangling: - case edg_demangling: - case gnat_demangling: - case auto_demangling: - valid_symbols = standard_symbol_characters (); - break; - case hp_demangling: - valid_symbols = hp_symbol_characters (); - break; - case gnu_v3_demangling: - valid_symbols = gnu_v3_symbol_characters (); - break; - default: - /* Folks should explicitly indicate the appropriate alphabet for - each demangling. Providing a default would allow the - question to go unconsidered. */ - abort (); - } - - for (;;) - { - int i = 0; - c = getchar (); - /* Try to read a label. */ - while (c != EOF && (ISALNUM (c) || strchr (valid_symbols, c))) - { - if (i >= MBUF_SIZE-1) - break; - mbuffer[i++] = c; - c = getchar (); - } - if (i > 0) - { - int skip_first = 0; - - if (mbuffer[0] == '.' || mbuffer[0] == '$') - ++skip_first; - if (strip_underscore && mbuffer[skip_first] == '_') - ++skip_first; - - if (skip_first > i) - skip_first = i; - - mbuffer[i] = 0; - flags |= (int) style; - result = cplus_demangle (mbuffer + skip_first, flags); - if (result) - { - if (mbuffer[0] == '.') - putc ('.', stdout); - fputs (result, stdout); - free (result); - } - else - fputs (mbuffer, stdout); - - fflush (stdout); - } - if (c == EOF) - break; - putchar (c); - fflush (stdout); - } - } - - return (0); -} - -static void -fatal (str) - const char *str; -{ - fprintf (stderr, "%s: %s\n", program_name, str); - exit (1); -} - -PTR -xmalloc (size) - size_t size; -{ - register PTR value = (PTR) malloc (size); - if (value == 0) - fatal ("virtual memory exhausted"); - return value; -} - -PTR -xrealloc (ptr, size) - PTR ptr; - size_t size; -{ - register PTR value = (PTR) realloc (ptr, size); - if (value == 0) - fatal ("virtual memory exhausted"); - return value; -} -#endif /* main */ diff --git a/coregrind/demangle/demangle.h b/coregrind/demangle/demangle.h deleted file mode 100644 index 238ae3398a..0000000000 --- a/coregrind/demangle/demangle.h +++ /dev/null @@ -1,177 +0,0 @@ -/* Defs for interface to demanglers. - Copyright 1992, 1993, 1994, 1995, 1996, 1997, 1998, 2000, 2001 - Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - - -#if !defined (DEMANGLE_H) -#define DEMANGLE_H - -#include - -#define current_demangling_style VG_(current_demangling_style) - -/* Options passed to cplus_demangle (in 2nd parameter). */ - -#define DMGL_NO_OPTS 0 /* For readability... */ -#define DMGL_PARAMS (1 << 0) /* Include function args */ -#define DMGL_ANSI (1 << 1) /* Include const, volatile, etc */ -#define DMGL_JAVA (1 << 2) /* Demangle as Java rather than C++. */ - -#define DMGL_AUTO (1 << 8) -#define DMGL_GNU (1 << 9) -#define DMGL_LUCID (1 << 10) -#define DMGL_ARM (1 << 11) -#define DMGL_HP (1 << 12) /* For the HP aCC compiler; - same as ARM except for - template arguments, etc. */ -#define DMGL_EDG (1 << 13) -#define DMGL_GNU_V3 (1 << 14) -#define DMGL_GNAT (1 << 15) - -/* If none of these are set, use 'current_demangling_style' as the default. */ -#define DMGL_STYLE_MASK (DMGL_AUTO|DMGL_GNU|DMGL_LUCID|DMGL_ARM|DMGL_HP|DMGL_EDG|DMGL_GNU_V3|DMGL_JAVA|DMGL_GNAT) - -/* Enumeration of possible demangling styles. - - Lucid and ARM styles are still kept logically distinct, even though - they now both behave identically. The resulting style is actual the - union of both. I.E. either style recognizes both "__pt__" and "__rf__" - for operator "->", even though the first is lucid style and the second - is ARM style. (FIXME?) */ - -extern enum demangling_styles -{ - no_demangling = -1, - unknown_demangling = 0, - auto_demangling = DMGL_AUTO, - gnu_demangling = DMGL_GNU, - lucid_demangling = DMGL_LUCID, - arm_demangling = DMGL_ARM, - hp_demangling = DMGL_HP, - edg_demangling = DMGL_EDG, - gnu_v3_demangling = DMGL_GNU_V3, - java_demangling = DMGL_JAVA, - gnat_demangling = DMGL_GNAT -} current_demangling_style; - -/* Define string names for the various demangling styles. */ - -#define NO_DEMANGLING_STYLE_STRING "none" -#define AUTO_DEMANGLING_STYLE_STRING "auto" -#define GNU_DEMANGLING_STYLE_STRING "gnu" -#define LUCID_DEMANGLING_STYLE_STRING "lucid" -#define ARM_DEMANGLING_STYLE_STRING "arm" -#define HP_DEMANGLING_STYLE_STRING "hp" -#define EDG_DEMANGLING_STYLE_STRING "edg" -#define GNU_V3_DEMANGLING_STYLE_STRING "gnu-v3" -#define JAVA_DEMANGLING_STYLE_STRING "java" -#define GNAT_DEMANGLING_STYLE_STRING "gnat" - -/* Some macros to test what demangling style is active. */ - -#define CURRENT_DEMANGLING_STYLE current_demangling_style -#define AUTO_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_AUTO) -#define GNU_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_GNU) -#define LUCID_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_LUCID) -#define ARM_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_ARM) -#define HP_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_HP) -#define EDG_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_EDG) -#define GNU_V3_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_GNU_V3) -#define JAVA_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_JAVA) -#define GNAT_DEMANGLING (((int) CURRENT_DEMANGLING_STYLE) & DMGL_GNAT) - -/* Provide information about the available demangle styles. This code is - pulled from gdb into libiberty because it is useful to binutils also. */ - -extern const struct demangler_engine -{ - const char *const demangling_style_name; - const enum demangling_styles demangling_style; - const char *const demangling_style_doc; -} libiberty_demanglers[]; - -extern char * -VG_(cplus_demangle) PARAMS ((const char *mangled, int options)); - -/* -extern int -cplus_demangle_opname PARAMS ((const char *opname, char *result, int options)); -*/ - -/* -extern const char * -cplus_mangle_opname PARAMS ((const char *opname, int options)); -*/ - -/* Note: This sets global state. FIXME if you care about multi-threading. */ - -/* -extern void -set_cplus_marker_for_demangling PARAMS ((int ch)); -*/ - -/* -extern enum demangling_styles -cplus_demangle_set_style PARAMS ((enum demangling_styles style)); -*/ - -/* -extern enum demangling_styles -cplus_demangle_name_to_style PARAMS ((const char *name)); -*/ - -/* V3 ABI demangling entry points, defined in cp-demangle.c. */ -extern char* -VG_(cplus_demangle_v3) PARAMS ((const char* mangled)); - -extern char* -VG_(java_demangle_v3) PARAMS ((const char* mangled)); - - -enum gnu_v3_ctor_kinds { - gnu_v3_complete_object_ctor = 1, - gnu_v3_base_object_ctor, - gnu_v3_complete_object_allocating_ctor -}; - -/* Return non-zero iff NAME is the mangled form of a constructor name - in the G++ V3 ABI demangling style. Specifically, return an `enum - gnu_v3_ctor_kinds' value indicating what kind of constructor - it is. */ -/* -extern enum gnu_v3_ctor_kinds - is_gnu_v3_mangled_ctor PARAMS ((const char *name)); -*/ - - -enum gnu_v3_dtor_kinds { - gnu_v3_deleting_dtor = 1, - gnu_v3_complete_object_dtor, - gnu_v3_base_object_dtor -}; - -/* Return non-zero iff NAME is the mangled form of a destructor name - in the G++ V3 ABI demangling style. Specifically, return an `enum - gnu_v3_dtor_kinds' value, indicating what kind of destructor - it is. */ -/* -extern enum gnu_v3_dtor_kinds - is_gnu_v3_mangled_dtor PARAMS ((const char *name)); -*/ - -#endif /* DEMANGLE_H */ diff --git a/coregrind/demangle/dyn-string.c b/coregrind/demangle/dyn-string.c deleted file mode 100644 index aaa7e36319..0000000000 --- a/coregrind/demangle/dyn-string.c +++ /dev/null @@ -1,439 +0,0 @@ -/* An abstract string datatype. - Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. - Contributed by Mark Mitchell (mark@markmitchell.com). - -This file is part of GNU CC. - -GNU CC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. - -GNU CC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GNU CC; see the file COPYING. If not, write to -the Free Software Foundation, 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ - -#ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#ifdef HAVE_STRING_H -#include -#endif - -#ifdef HAVE_STDLIB_H -#include -#endif - -#include "vg_include.h" -#include "ansidecl.h" -#include "dyn-string.h" - -#ifndef STANDALONE -#define malloc(s) VG_(malloc)(VG_AR_DEMANGLE, s) -#define free(p) VG_(free)(VG_AR_DEMANGLE, p) -#define realloc(p,s) VG_(realloc)(VG_AR_DEMANGLE, p, s) -#endif - -/* If this file is being compiled for inclusion in the C++ runtime - library, as part of the demangler implementation, we don't want to - abort if an allocation fails. Instead, percolate an error code up - through the call chain. */ - -#ifdef IN_LIBGCC2 -#define RETURN_ON_ALLOCATION_FAILURE -#endif - -/* Performs in-place initialization of a dyn_string struct. This - function can be used with a dyn_string struct on the stack or - embedded in another object. The contents of of the string itself - are still dynamically allocated. The string initially is capable - of holding at least SPACE characeters, including the terminating - NUL. If SPACE is 0, it will silently be increated to 1. - - If RETURN_ON_ALLOCATION_FAILURE is defined and memory allocation - fails, returns 0. Otherwise returns 1. */ - -int -dyn_string_init (ds_struct_ptr, space) - struct dyn_string *ds_struct_ptr; - int space; -{ - /* We need at least one byte in which to store the terminating NUL. */ - if (space == 0) - space = 1; - -#ifdef RETURN_ON_ALLOCATION_FAILURE - ds_struct_ptr->s = (char *) malloc (space); - if (ds_struct_ptr->s == NULL) - return 0; -#else - ds_struct_ptr->s = (char *) malloc (space); -#endif - ds_struct_ptr->allocated = space; - ds_struct_ptr->length = 0; - ds_struct_ptr->s[0] = '\0'; - - return 1; -} - -/* Create a new dynamic string capable of holding at least SPACE - characters, including the terminating NUL. If SPACE is 0, it will - be silently increased to 1. If RETURN_ON_ALLOCATION_FAILURE is - defined and memory allocation fails, returns NULL. Otherwise - returns the newly allocated string. */ - -dyn_string_t -dyn_string_new (space) - int space; -{ - dyn_string_t result; -#ifdef RETURN_ON_ALLOCATION_FAILURE - result = (dyn_string_t) malloc (sizeof (struct dyn_string)); - if (result == NULL) - return NULL; - if (!dyn_string_init (result, space)) - { - free (result); - return NULL; - } -#else - result = (dyn_string_t) malloc (sizeof (struct dyn_string)); - dyn_string_init (result, space); -#endif - return result; -} - -/* Free the memory used by DS. */ - -void -dyn_string_delete (ds) - dyn_string_t ds; -{ - free (ds->s); - free (ds); -} - -/* Returns the contents of DS in a buffer allocated with malloc. It - is the caller's responsibility to deallocate the buffer using free. - DS is then set to the empty string. Deletes DS itself. */ - -char* -dyn_string_release (ds) - dyn_string_t ds; -{ - /* Store the old buffer. */ - char* result = ds->s; - /* The buffer is no longer owned by DS. */ - ds->s = NULL; - /* Delete DS. */ - free (ds); - /* Return the old buffer. */ - return result; -} - -/* Increase the capacity of DS so it can hold at least SPACE - characters, plus the terminating NUL. This function will not (at - present) reduce the capacity of DS. Returns DS on success. - - If RETURN_ON_ALLOCATION_FAILURE is defined and a memory allocation - operation fails, deletes DS and returns NULL. */ - -dyn_string_t -dyn_string_resize (ds, space) - dyn_string_t ds; - int space; -{ - int new_allocated = ds->allocated; - - /* Increase SPACE to hold the NUL termination. */ - ++space; - - /* Increase allocation by factors of two. */ - while (space > new_allocated) - new_allocated *= 2; - - if (new_allocated != ds->allocated) - { - ds->allocated = new_allocated; - /* We actually need more space. */ -#ifdef RETURN_ON_ALLOCATION_FAILURE - ds->s = (char *) realloc (ds->s, ds->allocated); - if (ds->s == NULL) - { - free (ds); - return NULL; - } -#else - ds->s = (char *) realloc (ds->s, ds->allocated); -#endif - } - - return ds; -} - -/* Sets the contents of DS to the empty string. */ - -void -dyn_string_clear (ds) - dyn_string_t ds; -{ - /* A dyn_string always has room for at least the NUL terminator. */ - ds->s[0] = '\0'; - ds->length = 0; -} - -/* Makes the contents of DEST the same as the contents of SRC. DEST - and SRC must be distinct. Returns 1 on success. On failure, if - RETURN_ON_ALLOCATION_FAILURE, deletes DEST and returns 0. */ - -int -dyn_string_copy (dest, src) - dyn_string_t dest; - dyn_string_t src; -{ - if (dest == src) - VG_(panic) ("dyn_string_copy: src==dest"); - - /* Make room in DEST. */ - if (dyn_string_resize (dest, src->length) == NULL) - return 0; - /* Copy DEST into SRC. */ - VG_(strcpy) (dest->s, src->s); - /* Update the size of DEST. */ - dest->length = src->length; - return 1; -} - -/* Copies SRC, a NUL-terminated string, into DEST. Returns 1 on - success. On failure, if RETURN_ON_ALLOCATION_FAILURE, deletes DEST - and returns 0. */ - -int -dyn_string_copy_cstr (dest, src) - dyn_string_t dest; - const char *src; -{ - int length = VG_(strlen) (src); - /* Make room in DEST. */ - if (dyn_string_resize (dest, length) == NULL) - return 0; - /* Copy DEST into SRC. */ - VG_(strcpy) (dest->s, src); - /* Update the size of DEST. */ - dest->length = length; - return 1; -} - -/* Inserts SRC at the beginning of DEST. DEST is expanded as - necessary. SRC and DEST must be distinct. Returns 1 on success. - On failure, if RETURN_ON_ALLOCATION_FAILURE, deletes DEST and - returns 0. */ - -int -dyn_string_prepend (dest, src) - dyn_string_t dest; - dyn_string_t src; -{ - return dyn_string_insert (dest, 0, src); -} - -/* Inserts SRC, a NUL-terminated string, at the beginning of DEST. - DEST is expanded as necessary. Returns 1 on success. On failure, - if RETURN_ON_ALLOCATION_FAILURE, deletes DEST and returns 0. */ - -int -dyn_string_prepend_cstr (dest, src) - dyn_string_t dest; - const char *src; -{ - return dyn_string_insert_cstr (dest, 0, src); -} - -/* Inserts SRC into DEST starting at position POS. DEST is expanded - as necessary. SRC and DEST must be distinct. Returns 1 on - success. On failure, if RETURN_ON_ALLOCATION_FAILURE, deletes DEST - and returns 0. */ - -int -dyn_string_insert (dest, pos, src) - dyn_string_t dest; - int pos; - dyn_string_t src; -{ - int i; - - if (src == dest) - VG_(panic)( "dyn_string_insert: src==dest" ); - - if (dyn_string_resize (dest, dest->length + src->length) == NULL) - return 0; - /* Make room for the insertion. Be sure to copy the NUL. */ - for (i = dest->length; i >= pos; --i) - dest->s[i + src->length] = dest->s[i]; - /* Splice in the new stuff. */ - VG_(strncpy) (dest->s + pos, src->s, src->length); - /* Compute the new length. */ - dest->length += src->length; - return 1; -} - -/* Inserts SRC, a NUL-terminated string, into DEST starting at - position POS. DEST is expanded as necessary. Returns 1 on - success. On failure, RETURN_ON_ALLOCATION_FAILURE, deletes DEST - and returns 0. */ - -int -dyn_string_insert_cstr (dest, pos, src) - dyn_string_t dest; - int pos; - const char *src; -{ - int i; - int length = VG_(strlen) (src); - - if (dyn_string_resize (dest, dest->length + length) == NULL) - return 0; - /* Make room for the insertion. Be sure to copy the NUL. */ - for (i = dest->length; i >= pos; --i) - dest->s[i + length] = dest->s[i]; - /* Splice in the new stuff. */ - VG_(strncpy) (dest->s + pos, src, length); - /* Compute the new length. */ - dest->length += length; - return 1; -} - -/* Inserts character C into DEST starting at position POS. DEST is - expanded as necessary. Returns 1 on success. On failure, - RETURN_ON_ALLOCATION_FAILURE, deletes DEST and returns 0. */ - -int -dyn_string_insert_char (dest, pos, c) - dyn_string_t dest; - int pos; - int c; -{ - int i; - - if (dyn_string_resize (dest, dest->length + 1) == NULL) - return 0; - /* Make room for the insertion. Be sure to copy the NUL. */ - for (i = dest->length; i >= pos; --i) - dest->s[i + 1] = dest->s[i]; - /* Add the new character. */ - dest->s[pos] = c; - /* Compute the new length. */ - ++dest->length; - return 1; -} - -/* Append S to DS, resizing DS if necessary. Returns 1 on success. - On failure, if RETURN_ON_ALLOCATION_FAILURE, deletes DEST and - returns 0. */ - -int -dyn_string_append (dest, s) - dyn_string_t dest; - dyn_string_t s; -{ - if (dyn_string_resize (dest, dest->length + s->length) == 0) - return 0; - VG_(strcpy) (dest->s + dest->length, s->s); - dest->length += s->length; - return 1; -} - -/* Append the NUL-terminated string S to DS, resizing DS if necessary. - Returns 1 on success. On failure, if RETURN_ON_ALLOCATION_FAILURE, - deletes DEST and returns 0. */ - -int -dyn_string_append_cstr (dest, s) - dyn_string_t dest; - const char *s; -{ - int len = VG_(strlen) (s); - - /* The new length is the old length plus the size of our string, plus - one for the null at the end. */ - if (dyn_string_resize (dest, dest->length + len) == NULL) - return 0; - VG_(strcpy) (dest->s + dest->length, s); - dest->length += len; - return 1; -} - -/* Appends C to the end of DEST. Returns 1 on success. On failiure, - if RETURN_ON_ALLOCATION_FAILURE, deletes DEST and returns 0. */ - -int -dyn_string_append_char (dest, c) - dyn_string_t dest; - int c; -{ - /* Make room for the extra character. */ - if (dyn_string_resize (dest, dest->length + 1) == NULL) - return 0; - /* Append the character; it will overwrite the old NUL. */ - dest->s[dest->length] = c; - /* Add a new NUL at the end. */ - dest->s[dest->length + 1] = '\0'; - /* Update the length. */ - ++(dest->length); - return 1; -} - -/* Sets the contents of DEST to the substring of SRC starting at START - and ending before END. START must be less than or equal to END, - and both must be between zero and the length of SRC, inclusive. - Returns 1 on success. On failure, if RETURN_ON_ALLOCATION_FAILURE, - deletes DEST and returns 0. */ - -int -dyn_string_substring (dest, src, start, end) - dyn_string_t dest; - dyn_string_t src; - int start; - int end; -{ - int i; - int length = end - start; - - /* - vg_assert (start > end || start > src->length || end > src->length); - */ - - /* Make room for the substring. */ - if (dyn_string_resize (dest, length) == NULL) - return 0; - /* Copy the characters in the substring, */ - for (i = length; --i >= 0; ) - dest->s[i] = src->s[start + i]; - /* NUL-terimate the result. */ - dest->s[length] = '\0'; - /* Record the length of the substring. */ - dest->length = length; - - return 1; -} - -/* Returns non-zero if DS1 and DS2 have the same contents. */ - -int -dyn_string_eq (ds1, ds2) - dyn_string_t ds1; - dyn_string_t ds2; -{ - /* If DS1 and DS2 have different lengths, they must not be the same. */ - if (ds1->length != ds2->length) - return 0; - else - return !VG_(strcmp) (ds1->s, ds2->s); -} diff --git a/coregrind/demangle/dyn-string.h b/coregrind/demangle/dyn-string.h deleted file mode 100644 index 9615cd64ee..0000000000 --- a/coregrind/demangle/dyn-string.h +++ /dev/null @@ -1,96 +0,0 @@ -/* An abstract string datatype. - Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc. - Contributed by Mark Mitchell (mark@markmitchell.com). - -This file is part of GCC. - -GCC is free software; you can redistribute it and/or modify -it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 2, or (at your option) -any later version. - -GCC is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. - -You should have received a copy of the GNU General Public License -along with GCC; see the file COPYING. If not, write to -the Free Software Foundation, 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ -#ifndef __DYN_STRING_H -#define __DYN_STRING_H - - -typedef struct dyn_string -{ - int allocated; /* The amount of space allocated for the string. */ - int length; /* The actual length of the string. */ - char *s; /* The string itself, NUL-terminated. */ -}* dyn_string_t; - -/* The length STR, in bytes, not including the terminating NUL. */ -#define dyn_string_length(STR) \ - ((STR)->length) - -/* The NTBS in which the contents of STR are stored. */ -#define dyn_string_buf(STR) \ - ((STR)->s) - -/* Compare DS1 to DS2 with strcmp. */ -#define dyn_string_compare(DS1, DS2) \ - (VG_(strcmp) ((DS1)->s, (DS2)->s)) - - -/* dyn_string functions are used in the demangling implementation - included in the G++ runtime library. To prevent collisions with - names in user programs, the functions that are used in the - demangler are given implementation-reserved names. */ - -#if 1 /* def IN_LIBGCC2 */ - -#define dyn_string_init VG_(__cxa_dyn_string_init) -#define dyn_string_new VG_(__cxa_dyn_string_new) -#define dyn_string_delete VG_(__cxa_dyn_string_delete) -#define dyn_string_release VG_(__cxa_dyn_string_release) -#define dyn_string_resize VG_(__cxa_dyn_string_resize) -#define dyn_string_clear VG_(__cxa_dyn_string_clear) -#define dyn_string_copy VG_(__cxa_dyn_string_copy) -#define dyn_string_copy_cstr VG_(__cxa_dyn_string_copy_cstr) -#define dyn_string_prepend VG_(__cxa_dyn_string_prepend) -#define dyn_string_prepend_cstr VG_(__cxa_dyn_string_prepend_cstr) -#define dyn_string_insert VG_(__cxa_dyn_string_insert) -#define dyn_string_insert_cstr VG_(__cxa_dyn_string_insert_cstr) -#define dyn_string_insert_char VG_(__cxa_dyn_string_insert_char) -#define dyn_string_append VG_(__cxa_dyn_string_append) -#define dyn_string_append_cstr VG_(__cxa_dyn_string_append_cstr) -#define dyn_string_append_char VG_(__cxa_dyn_string_append_char) -#define dyn_string_substring VG_(__cxa_dyn_string_substring) -#define dyn_string_eq VG_(__cxa_dyn_string_eq) - -#endif /* IN_LIBGCC2 */ - - -extern int dyn_string_init PARAMS ((struct dyn_string *, int)); -extern dyn_string_t dyn_string_new PARAMS ((int)); -extern void dyn_string_delete PARAMS ((dyn_string_t)); -extern char *dyn_string_release PARAMS ((dyn_string_t)); -extern dyn_string_t dyn_string_resize PARAMS ((dyn_string_t, int)); -extern void dyn_string_clear PARAMS ((dyn_string_t)); -extern int dyn_string_copy PARAMS ((dyn_string_t, dyn_string_t)); -extern int dyn_string_copy_cstr PARAMS ((dyn_string_t, const char *)); -extern int dyn_string_prepend PARAMS ((dyn_string_t, dyn_string_t)); -extern int dyn_string_prepend_cstr PARAMS ((dyn_string_t, const char *)); -extern int dyn_string_insert PARAMS ((dyn_string_t, int, - dyn_string_t)); -extern int dyn_string_insert_cstr PARAMS ((dyn_string_t, int, - const char *)); -extern int dyn_string_insert_char PARAMS ((dyn_string_t, int, int)); -extern int dyn_string_append PARAMS ((dyn_string_t, dyn_string_t)); -extern int dyn_string_append_cstr PARAMS ((dyn_string_t, const char *)); -extern int dyn_string_append_char PARAMS ((dyn_string_t, int)); -extern int dyn_string_substring PARAMS ((dyn_string_t, - dyn_string_t, int, int)); -extern int dyn_string_eq PARAMS ((dyn_string_t, dyn_string_t)); - -#endif diff --git a/coregrind/demangle/safe-ctype.c b/coregrind/demangle/safe-ctype.c deleted file mode 100644 index 0c2be3ed79..0000000000 --- a/coregrind/demangle/safe-ctype.c +++ /dev/null @@ -1,163 +0,0 @@ -/* replacement macros. - - Copyright (C) 2000 Free Software Foundation, Inc. - Contributed by Zack Weinberg . - -This file is part of the libiberty library. -Libiberty is free software; you can redistribute it and/or -modify it under the terms of the GNU Library General Public -License as published by the Free Software Foundation; either -version 2 of the License, or (at your option) any later version. - -Libiberty is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Library General Public License for more details. - -You should have received a copy of the GNU Library General Public -License along with libiberty; see the file COPYING.LIB. If -not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ - -/* This is a compatible replacement of the standard C library's - with the following properties: - - - Implements all isxxx() macros required by C99. - - Also implements some character classes useful when - parsing C-like languages. - - Does not change behavior depending on the current locale. - - Behaves properly for all values in the range of a signed or - unsigned char. */ - -#include "ansidecl.h" -#include -#include /* for EOF */ - -/* Shorthand */ -#define bl _sch_isblank -#define cn _sch_iscntrl -#define di _sch_isdigit -#define is _sch_isidst -#define lo _sch_islower -#define nv _sch_isnvsp -#define pn _sch_ispunct -#define pr _sch_isprint -#define sp _sch_isspace -#define up _sch_isupper -#define vs _sch_isvsp -#define xd _sch_isxdigit - -/* Masks. */ -#define L lo|is |pr /* lower case letter */ -#define XL lo|is|xd|pr /* lowercase hex digit */ -#define U up|is |pr /* upper case letter */ -#define XU up|is|xd|pr /* uppercase hex digit */ -#define D di |xd|pr /* decimal digit */ -#define P pn |pr /* punctuation */ -#define _ pn|is |pr /* underscore */ - -#define C cn /* control character */ -#define Z nv |cn /* NUL */ -#define M nv|sp |cn /* cursor movement: \f \v */ -#define V vs|sp |cn /* vertical space: \r \n */ -#define T nv|sp|bl|cn /* tab */ -#define S nv|sp|bl|pr /* space */ - -/* Are we ASCII? */ -#if '\n' == 0x0A && ' ' == 0x20 && '0' == 0x30 \ - && 'A' == 0x41 && 'a' == 0x61 && '!' == 0x21 \ - && EOF == -1 - -const unsigned short _sch_istable[256] = -{ - Z, C, C, C, C, C, C, C, /* NUL SOH STX ETX EOT ENQ ACK BEL */ - C, T, V, M, M, V, C, C, /* BS HT LF VT FF CR SO SI */ - C, C, C, C, C, C, C, C, /* DLE DC1 DC2 DC3 DC4 NAK SYN ETB */ - C, C, C, C, C, C, C, C, /* CAN EM SUB ESC FS GS RS US */ - S, P, P, P, P, P, P, P, /* SP ! " # $ % & ' */ - P, P, P, P, P, P, P, P, /* ( ) * + , - . / */ - D, D, D, D, D, D, D, D, /* 0 1 2 3 4 5 6 7 */ - D, D, P, P, P, P, P, P, /* 8 9 : ; < = > ? */ - P, XU, XU, XU, XU, XU, XU, U, /* @ A B C D E F G */ - U, U, U, U, U, U, U, U, /* H I J K L M N O */ - U, U, U, U, U, U, U, U, /* P Q R S T U V W */ - U, U, U, P, P, P, P, _, /* X Y Z [ \ ] ^ _ */ - P, XL, XL, XL, XL, XL, XL, L, /* ` a b c d e f g */ - L, L, L, L, L, L, L, L, /* h i j k l m n o */ - L, L, L, L, L, L, L, L, /* p q r s t u v w */ - L, L, L, P, P, P, P, C, /* x y z { | } ~ DEL */ - - /* high half of unsigned char is locale-specific, so all tests are - false in "C" locale */ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -}; - -const unsigned char _sch_tolower[256] = -{ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, - - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', - 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - - 91, 92, 93, 94, 95, 96, - - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', - 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - - 123,124,125,126,127, - - 128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143, - 144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159, - 160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175, - 176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191, - - 192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207, - 208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223, - 224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239, - 240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255, -}; - -const unsigned char _sch_toupper[256] = -{ - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, - 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, - 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, - 64, - - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', - 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', - - 91, 92, 93, 94, 95, 96, - - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', - 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', - - 123,124,125,126,127, - - 128,129,130,131, 132,133,134,135, 136,137,138,139, 140,141,142,143, - 144,145,146,147, 148,149,150,151, 152,153,154,155, 156,157,158,159, - 160,161,162,163, 164,165,166,167, 168,169,170,171, 172,173,174,175, - 176,177,178,179, 180,181,182,183, 184,185,186,187, 188,189,190,191, - - 192,193,194,195, 196,197,198,199, 200,201,202,203, 204,205,206,207, - 208,209,210,211, 212,213,214,215, 216,217,218,219, 220,221,222,223, - 224,225,226,227, 228,229,230,231, 232,233,234,235, 236,237,238,239, - 240,241,242,243, 244,245,246,247, 248,249,250,251, 252,253,254,255, -}; - -#else - #error "Unsupported host character set" -#endif /* not ASCII */ diff --git a/coregrind/demangle/safe-ctype.h b/coregrind/demangle/safe-ctype.h deleted file mode 100644 index b2ad8490bd..0000000000 --- a/coregrind/demangle/safe-ctype.h +++ /dev/null @@ -1,103 +0,0 @@ -/* replacement macros. - - Copyright (C) 2000, 2001 Free Software Foundation, Inc. - Contributed by Zack Weinberg . - -This file is part of the libiberty library. -Libiberty is free software; you can redistribute it and/or -modify it under the terms of the GNU Library General Public -License as published by the Free Software Foundation; either -version 2 of the License, or (at your option) any later version. - -Libiberty is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Library General Public License for more details. - -You should have received a copy of the GNU Library General Public -License along with libiberty; see the file COPYING.LIB. If -not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, -Boston, MA 02111-1307, USA. */ - -/* This is a compatible replacement of the standard C library's - with the following properties: - - - Implements all isxxx() macros required by C99. - - Also implements some character classes useful when - parsing C-like languages. - - Does not change behavior depending on the current locale. - - Behaves properly for all values in the range of a signed or - unsigned char. - - To avoid conflicts, this header defines the isxxx functions in upper - case, e.g. ISALPHA not isalpha. */ - -#ifndef SAFE_CTYPE_H -#define SAFE_CTYPE_H - -#ifdef isalpha - #error "safe-ctype.h and ctype.h may not be used simultaneously" -#else - -/* Categories. */ - -enum { - /* In C99 */ - _sch_isblank = 0x0001, /* space \t */ - _sch_iscntrl = 0x0002, /* nonprinting characters */ - _sch_isdigit = 0x0004, /* 0-9 */ - _sch_islower = 0x0008, /* a-z */ - _sch_isprint = 0x0010, /* any printing character including ' ' */ - _sch_ispunct = 0x0020, /* all punctuation */ - _sch_isspace = 0x0040, /* space \t \n \r \f \v */ - _sch_isupper = 0x0080, /* A-Z */ - _sch_isxdigit = 0x0100, /* 0-9A-Fa-f */ - - /* Extra categories useful to cpplib. */ - _sch_isidst = 0x0200, /* A-Za-z_ */ - _sch_isvsp = 0x0400, /* \n \r */ - _sch_isnvsp = 0x0800, /* space \t \f \v \0 */ - - /* Combinations of the above. */ - _sch_isalpha = _sch_isupper|_sch_islower, /* A-Za-z */ - _sch_isalnum = _sch_isalpha|_sch_isdigit, /* A-Za-z0-9 */ - _sch_isidnum = _sch_isidst|_sch_isdigit, /* A-Za-z0-9_ */ - _sch_isgraph = _sch_isalnum|_sch_ispunct, /* isprint and not space */ - _sch_iscppsp = _sch_isvsp|_sch_isnvsp, /* isspace + \0 */ - _sch_isbasic = _sch_isprint|_sch_iscppsp /* basic charset of ISO C - (plus ` and @) */ -}; - -/* Character classification. */ -extern const unsigned short _sch_istable[256]; - -#define _sch_test(c, bit) (_sch_istable[(c) & 0xff] & (unsigned short)(bit)) - -#define ISALPHA(c) _sch_test(c, _sch_isalpha) -#define ISALNUM(c) _sch_test(c, _sch_isalnum) -#define ISBLANK(c) _sch_test(c, _sch_isblank) -#define ISCNTRL(c) _sch_test(c, _sch_iscntrl) -#define ISDIGIT(c) _sch_test(c, _sch_isdigit) -#define ISGRAPH(c) _sch_test(c, _sch_isgraph) -#define ISLOWER(c) _sch_test(c, _sch_islower) -#define ISPRINT(c) _sch_test(c, _sch_isprint) -#define ISPUNCT(c) _sch_test(c, _sch_ispunct) -#define ISSPACE(c) _sch_test(c, _sch_isspace) -#define ISUPPER(c) _sch_test(c, _sch_isupper) -#define ISXDIGIT(c) _sch_test(c, _sch_isxdigit) - -#define ISIDNUM(c) _sch_test(c, _sch_isidnum) -#define ISIDST(c) _sch_test(c, _sch_isidst) -#define IS_ISOBASIC(c) _sch_test(c, _sch_isbasic) -#define IS_VSPACE(c) _sch_test(c, _sch_isvsp) -#define IS_NVSPACE(c) _sch_test(c, _sch_isnvsp) -#define IS_SPACE_OR_NUL(c) _sch_test(c, _sch_iscppsp) - -/* Character transformation. */ -extern const unsigned char _sch_toupper[256]; -extern const unsigned char _sch_tolower[256]; -#define TOUPPER(c) _sch_toupper[(c) & 0xff] -#define TOLOWER(c) _sch_tolower[(c) & 0xff] - -#endif /* no ctype.h */ -#endif /* SAFE_CTYPE_H */ diff --git a/coregrind/docs/Makefile.am b/coregrind/docs/Makefile.am deleted file mode 100644 index e8a58fa18e..0000000000 --- a/coregrind/docs/Makefile.am +++ /dev/null @@ -1,5 +0,0 @@ -docdir = $(datadir)/doc/valgrind - -doc_DATA = index.html manual.html nav.html techdocs.html - -EXTRA_DIST = $(doc_DATA) diff --git a/coregrind/docs/index.html b/coregrind/docs/index.html deleted file mode 100644 index 1111702565..0000000000 --- a/coregrind/docs/index.html +++ /dev/null @@ -1,26 +0,0 @@ - - - - - - - - - - - Valgrind's user manual - - - - - - - <body> - <p>This page uses frames, but your browser doesn't support them.</p> - </body> - - - - diff --git a/coregrind/docs/manual.html b/coregrind/docs/manual.html deleted file mode 100644 index b715ee3dfe..0000000000 --- a/coregrind/docs/manual.html +++ /dev/null @@ -1,2702 +0,0 @@ - - - - Valgrind - - - - -  -

Valgrind, version 1.0.0

-
This manual was last updated on 20020726
-

- -

-jseward@acm.org
-Copyright © 2000-2002 Julian Seward -

-Valgrind is licensed under the GNU General Public License, -version 2
-An open-source tool for finding memory-management problems in -Linux-x86 executables. -

- -

- -


- -

Contents of this manual

- -

Introduction

- 1.1  What Valgrind is for
- 1.2  What it does with your program - -

How to use it, and how to make sense - of the results

- 2.1  Getting started
- 2.2  The commentary
- 2.3  Reporting of errors
- 2.4  Suppressing errors
- 2.5  Command-line flags
- 2.6  Explaination of error messages
- 2.7  Writing suppressions files
- 2.8  The Client Request mechanism
- 2.9  Support for POSIX pthreads
- 2.10  Building and installing
- 2.11  If you have problems
- -

Details of the checking machinery

- 3.1  Valid-value (V) bits
- 3.2  Valid-address (A) bits
- 3.3  Putting it all together
- 3.4  Signals
- 3.5  Memory leak detection
- -

Limitations

- -

How it works -- a rough overview

- 5.1  Getting started
- 5.2  The translation/instrumentation engine
- 5.3  Tracking the status of memory
- 5.4  System calls
- 5.5  Signals
- -

An example

- -

Cache profiling

- -

The design and implementation of Valgrind

- -
- - -

1  Introduction

- - -

1.1  What Valgrind is for

- -Valgrind is a tool to help you find memory-management problems in your -programs. When a program is run under Valgrind's supervision, all -reads and writes of memory are checked, and calls to -malloc/new/free/delete are intercepted. As a result, Valgrind can -detect problems such as: -
    -
  • Use of uninitialised memory
  • -
  • Reading/writing memory after it has been free'd
  • -
  • Reading/writing off the end of malloc'd blocks
  • -
  • Reading/writing inappropriate areas on the stack
  • -
  • Memory leaks -- where pointers to malloc'd blocks are lost - forever
  • -
  • Mismatched use of malloc/new/new [] vs free/delete/delete - []
  • -
  • Some misuses of the POSIX pthreads API
  • -
- -Problems like these can be difficult to find by other means, often -lying undetected for long periods, then causing occasional, -difficult-to-diagnose crashes. - -

-Valgrind is closely tied to details of the CPU, operating system and -to a less extent, compiler and basic C libraries. This makes it -difficult to make it portable, so I have chosen at the outset to -concentrate on what I believe to be a widely used platform: Linux on -x86s. Valgrind uses the standard Unix ./configure, -make, make install mechanism, and I have -attempted to ensure that it works on machines with kernel 2.2 or 2.4 -and glibc 2.1.X or 2.2.X. This should cover the vast majority of -modern Linux installations. - - -

-Valgrind is licensed under the GNU General Public License, version -2. Read the file LICENSE in the source distribution for details. Some -of the PThreads test cases, test/pth_*.c, are taken from -"Pthreads Programming" by Bradford Nichols, Dick Buttlar & Jacqueline -Proulx Farrell, ISBN 1-56592-115-1, published by O'Reilly & -Associates, Inc. - - - -

1.2  What it does with your program

- -Valgrind is designed to be as non-intrusive as possible. It works -directly with existing executables. You don't need to recompile, -relink, or otherwise modify, the program to be checked. Simply place -the word valgrind at the start of the command line -normally used to run the program. So, for example, if you want to run -the command ls -l on Valgrind, simply issue the -command: valgrind ls -l. - -

Valgrind takes control of your program before it starts. Debugging -information is read from the executable and associated libraries, so -that error messages can be phrased in terms of source code -locations. Your program is then run on a synthetic x86 CPU which -checks every memory access. All detected errors are written to a -log. When the program finishes, Valgrind searches for and reports on -leaked memory. - -

You can run pretty much any dynamically linked ELF x86 executable -using Valgrind. Programs run 25 to 50 times slower, and take a lot -more memory, than they usually would. It works well enough to run -large programs. For example, the Konqueror web browser from the KDE -Desktop Environment, version 3.0, runs slowly but usably on Valgrind. - -

Valgrind simulates every single instruction your program executes. -Because of this, it finds errors not only in your application but also -in all supporting dynamically-linked (.so-format) -libraries, including the GNU C library, the X client libraries, Qt, if -you work with KDE, and so on. That often includes libraries, for -example the GNU C library, which contain memory access violations, but -which you cannot or do not want to fix. - -

Rather than swamping you with errors in which you are not -interested, Valgrind allows you to selectively suppress errors, by -recording them in a suppressions file which is read when Valgrind -starts up. The build mechanism attempts to select suppressions which -give reasonable behaviour for the libc and XFree86 versions detected -on your machine. - - -

Section 6 shows an example of use. -

-


- - -

2  How to use it, and how to make sense of the results

- - -

2.1  Getting started

- -First off, consider whether it might be beneficial to recompile your -application and supporting libraries with optimisation disabled and -debugging info enabled (the -g flag). You don't have to -do this, but doing so helps Valgrind produce more accurate and less -confusing error reports. Chances are you're set up like this already, -if you intended to debug your program with GNU gdb, or some other -debugger. - -

-A plausible compromise is to use -g -O. -Optimisation levels above -O have been observed, on very -rare occasions, to cause gcc to generate code which fools Valgrind's -error tracking machinery into wrongly reporting uninitialised value -errors. -O gets you the vast majority of the benefits of -higher optimisation levels anyway, so you don't lose much there. - -

-Valgrind understands both the older "stabs" debugging format, used by -gcc versions prior to 3.1, and the newer DWARF2 format used by gcc 3.1 -and later. - -

-Then just run your application, but place the word -valgrind in front of your usual command-line invokation. -Note that you should run the real (machine-code) executable here. If -your application is started by, for example, a shell or perl script, -you'll need to modify it to invoke Valgrind on the real executables. -Running such scripts directly under Valgrind will result in you -getting error reports pertaining to /bin/sh, -/usr/bin/perl, or whatever interpreter you're using. -This almost certainly isn't what you want and can be confusing. - - -

2.2  The commentary

- -Valgrind writes a commentary, detailing error reports and other -significant events. The commentary goes to standard output by -default. This may interfere with your program, so you can ask for it -to be directed elsewhere. - -

All lines in the commentary are of the following form:
-

-  ==12345== some-message-from-Valgrind
-
-

The 12345 is the process ID. This scheme makes it easy -to distinguish program output from Valgrind commentary, and also easy -to differentiate commentaries from different processes which have -become merged together, for whatever reason. - -

By default, Valgrind writes only essential messages to the commentary, -so as to avoid flooding you with information of secondary importance. -If you want more information about what is happening, re-run, passing -the -v flag to Valgrind. - - - -

2.3  Reporting of errors

- -When Valgrind detects something bad happening in the program, an error -message is written to the commentary. For example:
-
-  ==25832== Invalid read of size 4
-  ==25832==    at 0x8048724: BandMatrix::ReSize(int, int, int) (bogon.cpp:45)
-  ==25832==    by 0x80487AF: main (bogon.cpp:66)
-  ==25832==    by 0x40371E5E: __libc_start_main (libc-start.c:129)
-  ==25832==    by 0x80485D1: (within /home/sewardj/newmat10/bogon)
-  ==25832==    Address 0xBFFFF74C is not stack'd, malloc'd or free'd
-
- -

This message says that the program did an illegal 4-byte read of -address 0xBFFFF74C, which, as far as it can tell, is not a valid stack -address, nor corresponds to any currently malloc'd or free'd blocks. -The read is happening at line 45 of bogon.cpp, called -from line 66 of the same file, etc. For errors associated with an -identified malloc'd/free'd block, for example reading free'd memory, -Valgrind reports not only the location where the error happened, but -also where the associated block was malloc'd/free'd. - -

Valgrind remembers all error reports. When an error is detected, -it is compared against old reports, to see if it is a duplicate. If -so, the error is noted, but no further commentary is emitted. This -avoids you being swamped with bazillions of duplicate error reports. - -

If you want to know how many times each error occurred, run with -the -v option. When execution finishes, all the reports -are printed out, along with, and sorted by, their occurrence counts. -This makes it easy to see which errors have occurred most frequently. - -

Errors are reported before the associated operation actually -happens. For example, if you program decides to read from address -zero, Valgrind will emit a message to this effect, and the program -will then duly die with a segmentation fault. - -

In general, you should try and fix errors in the order that they -are reported. Not doing so can be confusing. For example, a program -which copies uninitialised values to several memory locations, and -later uses them, will generate several error messages. The first such -error message may well give the most direct clue to the root cause of -the problem. - -

The process of detecting duplicate errors is quite an expensive -one and can become a significant performance overhead if your program -generates huge quantities of errors. To avoid serious problems here, -Valgrind will simply stop collecting errors after 300 different errors -have been seen, or 30000 errors in total have been seen. In this -situation you might as well stop your program and fix it, because -Valgrind won't tell you anything else useful after this. Note that -the 300/30000 limits apply after suppressed errors are removed. These -limits are defined in vg_include.h and can be increased -if necessary. - -

To avoid this cutoff you can use the ---error-limit=no flag. Then valgrind will always show -errors, regardless of how many there are. Use this flag carefully, -since it may have a dire effect on performance. - - - -

2.4  Suppressing errors

- -Valgrind detects numerous problems in the base libraries, such as the -GNU C library, and the XFree86 client libraries, which come -pre-installed on your GNU/Linux system. You can't easily fix these, -but you don't want to see these errors (and yes, there are many!) So -Valgrind reads a list of errors to suppress at startup. -A default suppression file is cooked up by the -./configure script. - -

You can modify and add to the suppressions file at your leisure, -or, better, write your own. Multiple suppression files are allowed. -This is useful if part of your project contains errors you can't or -don't want to fix, yet you don't want to continuously be reminded of -them. - -

Each error to be suppressed is described very specifically, to -minimise the possibility that a suppression-directive inadvertantly -suppresses a bunch of similar errors which you did want to see. The -suppression mechanism is designed to allow precise yet flexible -specification of errors to suppress. - -

If you use the -v flag, at the end of execution, Valgrind -prints out one line for each used suppression, giving its name and the -number of times it got used. Here's the suppressions used by a run of -ls -l: -

-  --27579-- supp: 1 socketcall.connect(serv_addr)/__libc_connect/__nscd_getgrgid_r
-  --27579-- supp: 1 socketcall.connect(serv_addr)/__libc_connect/__nscd_getpwuid_r
-  --27579-- supp: 6 strrchr/_dl_map_object_from_fd/_dl_map_object
-
- - -

2.5  Command-line flags

- -You invoke Valgrind like this: -
-  valgrind [options-for-Valgrind] your-prog [options for your-prog]
-
- -

Note that Valgrind also reads options from the environment variable -$VALGRIND, and processes them before the command-line -options. - -

Valgrind's default settings succeed in giving reasonable behaviour -in most cases. Available options, in no particular order, are as -follows: -

    -
  • --help

  • - -
  • --version
    -

    The usual deal.


  • - -

  • -v --verbose
    -

    Be more verbose. Gives extra information on various aspects - of your program, such as: the shared objects loaded, the - suppressions used, the progress of the instrumentation engine, - and warnings about unusual behaviour. -


  • - -

  • -q --quiet
    -

    Run silently, and only print error messages. Useful if you - are running regression tests or have some other automated test - machinery. -


  • - -

  • --demangle=no
    - --demangle=yes [the default] -

    Disable/enable automatic demangling (decoding) of C++ names. - Enabled by default. When enabled, Valgrind will attempt to - translate encoded C++ procedure names back to something - approaching the original. The demangler handles symbols mangled - by g++ versions 2.X and 3.X. - -

    An important fact about demangling is that function - names mentioned in suppressions files should be in their mangled - form. Valgrind does not demangle function names when searching - for applicable suppressions, because to do otherwise would make - suppressions file contents dependent on the state of Valgrind's - demangling machinery, and would also be slow and pointless. -


  • - -

  • --num-callers=<number> [default=4]
    -

    By default, Valgrind shows four levels of function call names - to help you identify program locations. You can change that - number with this option. This can help in determining the - program's location in deeply-nested call chains. Note that errors - are commoned up using only the top three function locations (the - place in the current function, and that of its two immediate - callers). So this doesn't affect the total number of errors - reported. -

    - The maximum value for this is 50. Note that higher settings - will make Valgrind run a bit more slowly and take a bit more - memory, but can be useful when working with programs with - deeply-nested call chains. -


  • - -

  • --gdb-attach=no [the default]
    - --gdb-attach=yes -

    When enabled, Valgrind will pause after every error shown, - and print the line -
    - ---- Attach to GDB ? --- [Return/N/n/Y/y/C/c] ---- -

    - Pressing Ret, or N Ret - or n Ret, causes Valgrind not to - start GDB for this error. -

    - Y Ret - or y Ret causes Valgrind to - start GDB, for the program at this point. When you have - finished with GDB, quit from it, and the program will continue. - Trying to continue from inside GDB doesn't work. -

    - C Ret - or c Ret causes Valgrind not to - start GDB, and not to ask again. -

    - --gdb-attach=yes conflicts with - --trace-children=yes. You can't use them together. - Valgrind refuses to start up in this situation. 1 May 2002: - this is a historical relic which could be easily fixed if it - gets in your way. Mail me and complain if this is a problem for - you.


  • - -

  • --partial-loads-ok=yes [the default]
    - --partial-loads-ok=no -

    Controls how Valgrind handles word (4-byte) loads from - addresses for which some bytes are addressible and others - are not. When yes (the default), such loads - do not elicit an address error. Instead, the loaded V bytes - corresponding to the illegal addresses indicate undefined, and - those corresponding to legal addresses are loaded from shadow - memory, as usual. -

    - When no, loads from partially - invalid addresses are treated the same as loads from completely - invalid addresses: an illegal-address error is issued, - and the resulting V bytes indicate valid data. -


  • - -

  • --sloppy-malloc=no [the default]
    - --sloppy-malloc=yes -

    When enabled, all requests for malloc/calloc are rounded up - to a whole number of machine words -- in other words, made - divisible by 4. For example, a request for 17 bytes of space - would result in a 20-byte area being made available. This works - around bugs in sloppy libraries which assume that they can - safely rely on malloc/calloc requests being rounded up in this - fashion. Without the workaround, these libraries tend to - generate large numbers of errors when they access the ends of - these areas. -

    - Valgrind snapshots dated 17 Feb 2002 and later are - cleverer about this problem, and you should no longer need to - use this flag. To put it bluntly, if you do need to use this - flag, your program violates the ANSI C semantics defined for - malloc and free, even if it appears to - work correctly, and you should fix it, at least if you hope for - maximum portability. -


  • - -

  • --alignment=<number> [default: 4]

    By - default valgrind's malloc, realloc, - etc, return 4-byte aligned addresses. These are suitable for - any accesses on x86 processors. - Some programs might however assume that malloc et - al return 8- or more aligned memory. - These programs are broken and should be fixed, but - if this is impossible for whatever reason the alignment can be - increased using this parameter. The supplied value must be - between 4 and 4096 inclusive, and must be a power of two.


  • - -

  • --trace-children=no [the default]
    - --trace-children=yes -

    When enabled, Valgrind will trace into child processes. This - is confusing and usually not what you want, so is disabled by - default. As of 1 May 2002, tracing into a child process from a - parent which uses libpthread.so is probably broken - and is likely to cause breakage. Please report any such - problems to me.


  • - -

  • --freelist-vol=<number> [default: 1000000] -

    When the client program releases memory using free (in C) or - delete (C++), that memory is not immediately made available for - re-allocation. Instead it is marked inaccessible and placed in - a queue of freed blocks. The purpose is to delay the point at - which freed-up memory comes back into circulation. This - increases the chance that Valgrind will be able to detect - invalid accesses to blocks for some significant period of time - after they have been freed. -

    - This flag specifies the maximum total size, in bytes, of the - blocks in the queue. The default value is one million bytes. - Increasing this increases the total amount of memory used by - Valgrind but may detect invalid uses of freed blocks which would - otherwise go undetected.


  • - -

  • --logfile-fd=<number> [default: 2, stderr] -

    Specifies the file descriptor on which Valgrind communicates - all of its messages. The default, 2, is the standard error - channel. This may interfere with the client's own use of - stderr. To dump Valgrind's commentary in a file without using - stderr, something like the following works well (sh/bash - syntax):
    -    - valgrind --logfile-fd=9 my_prog 9> logfile
    - That is: tell Valgrind to send all output to file descriptor 9, - and ask the shell to route file descriptor 9 to "logfile". -


  • - -

  • --suppressions=<filename> - [default: $PREFIX/lib/valgrind/default.supp] -

    Specifies an extra - file from which to read descriptions of errors to suppress. You - may use as many extra suppressions files as you - like.


  • - -

  • --leak-check=no [default]
    - --leak-check=yes -

    When enabled, search for memory leaks when the client program - finishes. A memory leak means a malloc'd block, which has not - yet been free'd, but to which no pointer can be found. Such a - block can never be free'd by the program, since no pointer to it - exists. Leak checking is disabled by default because it tends - to generate dozens of error messages.


  • - -

  • --show-reachable=no [default]
    - --show-reachable=yes -

    When disabled, the memory leak detector only shows blocks for - which it cannot find a pointer to at all, or it can only find a - pointer to the middle of. These blocks are prime candidates for - memory leaks. When enabled, the leak detector also reports on - blocks which it could find a pointer to. Your program could, at - least in principle, have freed such blocks before exit. - Contrast this to blocks for which no pointer, or only an - interior pointer could be found: they are more likely to - indicate memory leaks, because you do not actually have a - pointer to the start of the block which you can hand to - free, even if you wanted to.


  • - -

  • --leak-resolution=low [default]
    - --leak-resolution=med
    - --leak-resolution=high -

    When doing leak checking, determines how willing Valgrind is - to consider different backtraces to be the same. When set to - low, the default, only the first two entries need - match. When med, four entries have to match. When - high, all entries need to match. -

    - For hardcore leak debugging, you probably want to use - --leak-resolution=high together with - --num-callers=40 or some such large number. Note - however that this can give an overwhelming amount of - information, which is why the defaults are 4 callers and - low-resolution matching. -

    - Note that the --leak-resolution= setting does not - affect Valgrind's ability to find leaks. It only changes how - the results are presented. -


  • - -

  • --workaround-gcc296-bugs=no [default]
    - --workaround-gcc296-bugs=yes

    When enabled, - assume that reads and writes some small distance below the stack - pointer %esp are due to bugs in gcc 2.96, and does - not report them. The "small distance" is 256 bytes by default. - Note that gcc 2.96 is the default compiler on some popular Linux - distributions (RedHat 7.X, Mandrake) and so you may well need to - use this flag. Do not use it if you do not have to, as it can - cause real errors to be overlooked. Another option is to use a - gcc/g++ which does not generate accesses below the stack - pointer. 2.95.3 seems to be a good choice in this respect. -

    - Unfortunately (27 Feb 02) it looks like g++ 3.0.4 has a similar - bug, so you may need to issue this flag if you use 3.0.4. A - while later (early Apr 02) this is confirmed as a scheduling bug - in g++-3.0.4. -


  • - -

  • --error-limit=yes [default]
    - --error-limit=no

    When enabled, valgrind stops - reporting errors after 30000 in total, or 300 different ones, - have been seen. This is to stop the error tracking machinery - from becoming a huge performance overhead in programs with many - errors.


  • - -

  • --cachesim=no [default]
    - --cachesim=yes

    When enabled, turns off memory - checking, and turns on cache profiling. Cache profiling is - described in detail in Section 7. -


  • - -

  • --weird-hacks=hack1,hack2,... - Pass miscellaneous hints to Valgrind which slightly modify the - simulated behaviour in nonstandard or dangerous ways, possibly - to help the simulation of strange features. By default no hacks - are enabled. Use with caution! Currently known hacks are: -

    -

      -
    • ioctl-VTIME Use this if you have a program - which sets readable file descriptors to have a timeout by - doing ioctl on them with a - TCSETA-style command and a non-zero - VTIME timeout value. This is considered - potentially dangerous and therefore is not engaged by - default, because it is (remotely) conceivable that it could - cause threads doing read to incorrectly block - the entire process. -

      - You probably want to try this one if you have a program - which unexpectedly blocks in a read from a file - descriptor which you know to have been messed with by - ioctl. This could happen, for example, if the - descriptor is used to read input from some kind of screen - handling library. -

      - To find out if your program is blocking unexpectedly in the - read system call, run with - --trace-syscalls=yes flag. -

      -

    • truncate-writes Use this if you have a threaded - program which appears to unexpectedly block whilst writing - into a pipe. The effect is to modify all calls to - write() so that requests to write more than - 4096 bytes are treated as if they only requested a write of - 4096 bytes. Valgrind does this by changing the - count argument of write(), as - passed to the kernel, so that it is at most 4096. The - amount of data written will then be less than the client - program asked for, but the client should have a loop around - its write() call to check whether the requested - number of bytes have been written. If not, it should issue - further write() calls until all the data is - written. -

      - This all sounds pretty dodgy to me, which is why I've made - this behaviour only happen on request. It is not the - default behaviour. At the time of writing this (30 June - 2002) I have only seen one example where this is necessary, - so either the problem is extremely rare or nobody is using - Valgrind :-) -

      - On experimentation I see that truncate-writes - doesn't interact well with ioctl-VTIME, so you - probably don't want to try both at once. -

      - As above, to find out if your program is blocking - unexpectedly in the write() system call, you - may find the --trace-syscalls=yes - --trace-sched=yes flags useful. -

    - -
  • -

- -There are also some options for debugging Valgrind itself. You -shouldn't need to use them in the normal run of things. Nevertheless: - -
    - -
  • --single-step=no [default]
    - --single-step=yes -

    When enabled, each x86 insn is translated seperately into - instrumented code. When disabled, translation is done on a - per-basic-block basis, giving much better translations.


  • -

    - -

  • --optimise=no
    - --optimise=yes [default] -

    When enabled, various improvements are applied to the - intermediate code, mainly aimed at allowing the simulated CPU's - registers to be cached in the real CPU's registers over several - simulated instructions.


  • -

    - -

  • --instrument=no
    - --instrument=yes [default] -

    When disabled, the translations don't actually contain any - instrumentation.


  • -

    - -

  • --cleanup=no
    - --cleanup=yes [default] -

    When enabled, various improvments are applied to the - post-instrumented intermediate code, aimed at removing redundant - value checks.


  • -

    - -

  • --trace-syscalls=no [default]
    - --trace-syscalls=yes -

    Enable/disable tracing of system call intercepts.


  • -

    - -

  • --trace-signals=no [default]
    - --trace-signals=yes -

    Enable/disable tracing of signal handling.


  • -

    - -

  • --trace-sched=no [default]
    - --trace-sched=yes -

    Enable/disable tracing of thread scheduling events.


  • -

    - -

  • --trace-pthread=none [default]
    - --trace-pthread=some
    - --trace-pthread=all -

    Specifies amount of trace detail for pthread-related events.


  • -

    - -

  • --trace-symtab=no [default]
    - --trace-symtab=yes -

    Enable/disable tracing of symbol table reading.


  • -

    - -

  • --trace-malloc=no [default]
    - --trace-malloc=yes -

    Enable/disable tracing of malloc/free (et al) intercepts. -


  • -

    - -

  • --stop-after=<number> - [default: infinity, more or less] -

    After <number> basic blocks have been executed, shut down - Valgrind and switch back to running the client on the real CPU. -


  • -

    - -

  • --dump-error=<number> [default: inactive] -

    After the program has exited, show gory details of the - translation of the basic block containing the <number>'th - error context. When used with --single-step=yes, - can show the exact x86 instruction causing an error. This is - all fairly dodgy and doesn't work at all if threads are - involved.


  • -

    -

- - - -

2.6  Explaination of error messages

- -Despite considerable sophistication under the hood, Valgrind can only -really detect two kinds of errors, use of illegal addresses, and use -of undefined values. Nevertheless, this is enough to help you -discover all sorts of memory-management nasties in your code. This -section presents a quick summary of what error messages mean. The -precise behaviour of the error-checking machinery is described in -Section 4. - - -

2.6.1  Illegal read / Illegal write errors

-For example: -
-  Invalid read of size 4
-     at 0x40F6BBCC: (within /usr/lib/libpng.so.2.1.0.9)
-     by 0x40F6B804: (within /usr/lib/libpng.so.2.1.0.9)
-     by 0x40B07FF4: read_png_image__FP8QImageIO (kernel/qpngio.cpp:326)
-     by 0x40AC751B: QImageIO::read() (kernel/qimage.cpp:3621)
-     Address 0xBFFFF0E0 is not stack'd, malloc'd or free'd
-
- -

This happens when your program reads or writes memory at a place -which Valgrind reckons it shouldn't. In this example, the program did -a 4-byte read at address 0xBFFFF0E0, somewhere within the -system-supplied library libpng.so.2.1.0.9, which was called from -somewhere else in the same library, called from line 326 of -qpngio.cpp, and so on. - -

Valgrind tries to establish what the illegal address might relate -to, since that's often useful. So, if it points into a block of -memory which has already been freed, you'll be informed of this, and -also where the block was free'd at. Likewise, if it should turn out -to be just off the end of a malloc'd block, a common result of -off-by-one-errors in array subscripting, you'll be informed of this -fact, and also where the block was malloc'd. - -

In this example, Valgrind can't identify the address. Actually the -address is on the stack, but, for some reason, this is not a valid -stack address -- it is below the stack pointer, %esp, and that isn't -allowed. In this particular case it's probably caused by gcc -generating invalid code, a known bug in various flavours of gcc. - -

Note that Valgrind only tells you that your program is about to -access memory at an illegal address. It can't stop the access from -happening. So, if your program makes an access which normally would -result in a segmentation fault, you program will still suffer the same -fate -- but you will get a message from Valgrind immediately prior to -this. In this particular example, reading junk on the stack is -non-fatal, and the program stays alive. - - -

2.6.2  Use of uninitialised values

-For example: -
-  Conditional jump or move depends on uninitialised value(s)
-     at 0x402DFA94: _IO_vfprintf (_itoa.h:49)
-     by 0x402E8476: _IO_printf (printf.c:36)
-     by 0x8048472: main (tests/manuel1.c:8)
-     by 0x402A6E5E: __libc_start_main (libc-start.c:129)
-
- -

An uninitialised-value use error is reported when your program uses -a value which hasn't been initialised -- in other words, is undefined. -Here, the undefined value is used somewhere inside the printf() -machinery of the C library. This error was reported when running the -following small program: -

-  int main()
-  {
-    int x;
-    printf ("x = %d\n", x);
-  }
-
- -

It is important to understand that your program can copy around -junk (uninitialised) data to its heart's content. Valgrind observes -this and keeps track of the data, but does not complain. A complaint -is issued only when your program attempts to make use of uninitialised -data. In this example, x is uninitialised. Valgrind observes the -value being passed to _IO_printf and thence to _IO_vfprintf, but makes -no comment. However, _IO_vfprintf has to examine the value of x so it -can turn it into the corresponding ASCII string, and it is at this -point that Valgrind complains. - -

Sources of uninitialised data tend to be: -

    -
  • Local variables in procedures which have not been initialised, - as in the example above.

  • - -

  • The contents of malloc'd blocks, before you write something - there. In C++, the new operator is a wrapper round malloc, so - if you create an object with new, its fields will be - uninitialised until you fill them in, which is only Right and - Proper.
  • -
- - - -

2.6.3  Illegal frees

-For example: -
-  Invalid free()
-     at 0x4004FFDF: free (ut_clientmalloc.c:577)
-     by 0x80484C7: main (tests/doublefree.c:10)
-     by 0x402A6E5E: __libc_start_main (libc-start.c:129)
-     by 0x80483B1: (within tests/doublefree)
-     Address 0x3807F7B4 is 0 bytes inside a block of size 177 free'd
-     at 0x4004FFDF: free (ut_clientmalloc.c:577)
-     by 0x80484C7: main (tests/doublefree.c:10)
-     by 0x402A6E5E: __libc_start_main (libc-start.c:129)
-     by 0x80483B1: (within tests/doublefree)
-
-

Valgrind keeps track of the blocks allocated by your program with -malloc/new, so it can know exactly whether or not the argument to -free/delete is legitimate or not. Here, this test program has -freed the same block twice. As with the illegal read/write errors, -Valgrind attempts to make sense of the address free'd. If, as -here, the address is one which has previously been freed, you wil -be told that -- making duplicate frees of the same block easy to spot. - - -

2.6.4  When a block is freed with an inappropriate -deallocation function

-In the following example, a block allocated with new[] -has wrongly been deallocated with free: -
-  Mismatched free() / delete / delete []
-     at 0x40043249: free (vg_clientfuncs.c:171)
-     by 0x4102BB4E: QGArray::~QGArray(void) (tools/qgarray.cpp:149)
-     by 0x4C261C41: PptDoc::~PptDoc(void) (include/qmemarray.h:60)
-     by 0x4C261F0E: PptXml::~PptXml(void) (pptxml.cc:44)
-     Address 0x4BB292A8 is 0 bytes inside a block of size 64 alloc'd
-     at 0x4004318C: __builtin_vec_new (vg_clientfuncs.c:152)
-     by 0x4C21BC15: KLaola::readSBStream(int) const (klaola.cc:314)
-     by 0x4C21C155: KLaola::stream(KLaola::OLENode const *) (klaola.cc:416)
-     by 0x4C21788F: OLEFilter::convert(QCString const &) (olefilter.cc:272)
-
-The following was told to me be the KDE 3 developers. I didn't know -any of it myself. They also implemented the check itself. -

-In C++ it's important to deallocate memory in a way compatible with -how it was allocated. The deal is: -

    -
  • If allocated with malloc, calloc, - realloc, valloc or - memalign, you must deallocate with free. -
  • If allocated with new[], you must deallocate with - delete[]. -
  • If allocated with new, you must deallocate with - delete. -
-The worst thing is that on Linux apparently it doesn't matter if you -do muddle these up, and it all seems to work ok, but the same program -may then crash on a different platform, Solaris for example. So it's -best to fix it properly. According to the KDE folks "it's amazing how -many C++ programmers don't know this". -

-Pascal Massimino adds the following clarification: -delete[] must be called associated with a -new[] because the compiler stores the size of the array -and the pointer-to-member to the destructor of the array's content -just before the pointer actually returned. This implies a -variable-sized overhead in what's returned by new or -new[]. It rather surprising how compilers [Ed: -runtime-support libraries?] are robust to mismatch in -new/delete -new[]/delete[]. - - -

2.6.5  Passing system call parameters with inadequate -read/write permissions

- -Valgrind checks all parameters to system calls. If a system call -needs to read from a buffer provided by your program, Valgrind checks -that the entire buffer is addressible and has valid data, ie, it is -readable. And if the system call needs to write to a user-supplied -buffer, Valgrind checks that the buffer is addressible. After the -system call, Valgrind updates its administrative information to -precisely reflect any changes in memory permissions caused by the -system call. - -

Here's an example of a system call with an invalid parameter: -

-  #include <stdlib.h>
-  #include <unistd.h>
-  int main( void )
-  {
-    char* arr = malloc(10);
-    (void) write( 1 /* stdout */, arr, 10 );
-    return 0;
-  }
-
- -

You get this complaint ... -

-  Syscall param write(buf) contains uninitialised or unaddressable byte(s)
-     at 0x4035E072: __libc_write
-     by 0x402A6E5E: __libc_start_main (libc-start.c:129)
-     by 0x80483B1: (within tests/badwrite)
-     by <bogus frame pointer> ???
-     Address 0x3807E6D0 is 0 bytes inside a block of size 10 alloc'd
-     at 0x4004FEE6: malloc (ut_clientmalloc.c:539)
-     by 0x80484A0: main (tests/badwrite.c:6)
-     by 0x402A6E5E: __libc_start_main (libc-start.c:129)
-     by 0x80483B1: (within tests/badwrite)
-
- -

... because the program has tried to write uninitialised junk from -the malloc'd block to the standard output. - - -

2.6.6  Warning messages you might see

- -Most of these only appear if you run in verbose mode (enabled by --v): -
    -
  • More than 50 errors detected. Subsequent errors - will still be recorded, but in less detail than before. -
    - After 50 different errors have been shown, Valgrind becomes - more conservative about collecting them. It then requires only - the program counters in the top two stack frames to match when - deciding whether or not two errors are really the same one. - Prior to this point, the PCs in the top four frames are required - to match. This hack has the effect of slowing down the - appearance of new errors after the first 50. The 50 constant can - be changed by recompiling Valgrind. -

    -

  • More than 300 errors detected. I'm not reporting any more. - Final error counts may be inaccurate. Go fix your - program! -
    - After 300 different errors have been detected, Valgrind ignores - any more. It seems unlikely that collecting even more different - ones would be of practical help to anybody, and it avoids the - danger that Valgrind spends more and more of its time comparing - new errors against an ever-growing collection. As above, the 300 - number is a compile-time constant. -

    -

  • Warning: client switching stacks? -
    - Valgrind spotted such a large change in the stack pointer, %esp, - that it guesses the client is switching to a different stack. - At this point it makes a kludgey guess where the base of the new - stack is, and sets memory permissions accordingly. You may get - many bogus error messages following this, if Valgrind guesses - wrong. At the moment "large change" is defined as a change of - more that 2000000 in the value of the %esp (stack pointer) - register. -

    -

  • Warning: client attempted to close Valgrind's logfile fd <number> - -
    - Valgrind doesn't allow the client - to close the logfile, because you'd never see any diagnostic - information after that point. If you see this message, - you may want to use the --logfile-fd=<number> - option to specify a different logfile file-descriptor number. -

    -

  • Warning: noted but unhandled ioctl <number> -
    - Valgrind observed a call to one of the vast family of - ioctl system calls, but did not modify its - memory status info (because I have not yet got round to it). - The call will still have gone through, but you may get spurious - errors after this as a result of the non-update of the memory info. -

    -

  • Warning: set address range perms: large range <number> -
    - Diagnostic message, mostly for my benefit, to do with memory - permissions. -
- - - -

2.7  Writing suppressions files

- -A suppression file describes a bunch of errors which, for one reason -or another, you don't want Valgrind to tell you about. Usually the -reason is that the system libraries are buggy but unfixable, at least -within the scope of the current debugging session. Multiple -suppressions files are allowed. By default, Valgrind uses -$PREFIX/lib/valgrind/default.supp. - -

-You can ask to add suppressions from another file, by specifying ---suppressions=/path/to/file.supp. - -

Each suppression has the following components:
-

    - -
  • Its name. This merely gives a handy name to the suppression, by - which it is referred to in the summary of used suppressions - printed out when a program finishes. It's not important what - the name is; any identifying string will do. -

    - -

  • The nature of the error to suppress. Either: - Value1, - Value2, - Value4 or - Value8, - meaning an uninitialised-value error when - using a value of 1, 2, 4 or 8 bytes. - Or - Cond (or its old name, Value0), - meaning use of an uninitialised CPU condition code. Or: - Addr1, - Addr2, - Addr4 or - Addr8, meaning an invalid address during a - memory access of 1, 2, 4 or 8 bytes respectively. Or - Param, - meaning an invalid system call parameter error. Or - Free, meaning an invalid or mismatching free. - Or PThread, meaning any kind of complaint to do - with the PThreads API.

  • -

    - -

  • The "immediate location" specification. For Value and Addr - errors, is either the name of the function in which the error - occurred, or, failing that, the full path the the .so file - containing the error location. For Param errors, is the name of - the offending system call parameter. For Free errors, is the - name of the function doing the freeing (eg, free, - __builtin_vec_delete, etc)

  • -

    - -

  • The caller of the above "immediate location". Again, either a - function or shared-object name.

  • -

    - -

  • Optionally, one or two extra calling-function or object names, - for greater precision.
  • -
- -

-Locations may be either names of shared objects or wildcards matching -function names. They begin obj: and fun: -respectively. Function and object names to match against may use the -wildcard characters * and ?. - -A suppression only suppresses an error when the error matches all the -details in the suppression. Here's an example: -

-  {
-    __gconv_transform_ascii_internal/__mbrtowc/mbtowc
-    Value4
-    fun:__gconv_transform_ascii_internal
-    fun:__mbr*toc
-    fun:mbtowc
-  }
-
- -

What is means is: suppress a use-of-uninitialised-value error, when -the data size is 4, when it occurs in the function -__gconv_transform_ascii_internal, when that is called -from any function of name matching __mbr*toc, -when that is called from -mbtowc. It doesn't apply under any other circumstances. -The string by which this suppression is identified to the user is -__gconv_transform_ascii_internal/__mbrtowc/mbtowc. - -

Another example: -

-  {
-    libX11.so.6.2/libX11.so.6.2/libXaw.so.7.0
-    Value4
-    obj:/usr/X11R6/lib/libX11.so.6.2
-    obj:/usr/X11R6/lib/libX11.so.6.2
-    obj:/usr/X11R6/lib/libXaw.so.7.0
-  }
-
- -

Suppress any size 4 uninitialised-value error which occurs anywhere -in libX11.so.6.2, when called from anywhere in the same -library, when called from anywhere in libXaw.so.7.0. The -inexact specification of locations is regrettable, but is about all -you can hope for, given that the X11 libraries shipped with Red Hat -7.2 have had their symbol tables removed. - -

Note -- since the above two examples did not make it clear -- that -you can freely mix the obj: and fun: -styles of description within a single suppression record. - - - -

2.8  The Client Request mechanism

- -Valgrind has a trapdoor mechanism via which the client program can -pass all manner of requests and queries to Valgrind. Internally, this -is used extensively to make malloc, free, signals, threads, etc, work, -although you don't see that. -

-For your convenience, a subset of these so-called client requests is -provided to allow you to tell Valgrind facts about the behaviour of -your program, and conversely to make queries. In particular, your -program can tell Valgrind about changes in memory range permissions -that Valgrind would not otherwise know about, and so allows clients to -get Valgrind to do arbitrary custom checks. -

-Clients need to include the header file valgrind.h to -make this work. The macros therein have the magical property that -they generate code in-line which Valgrind can spot. However, the code -does nothing when not run on Valgrind, so you are not forced to run -your program on Valgrind just because you use the macros in this file. -Also, you are not required to link your program with any extra -supporting libraries. -

-A brief description of the available macros: -

    -
  • VALGRIND_MAKE_NOACCESS, - VALGRIND_MAKE_WRITABLE and - VALGRIND_MAKE_READABLE. These mark address - ranges as completely inaccessible, accessible but containing - undefined data, and accessible and containing defined data, - respectively. Subsequent errors may have their faulting - addresses described in terms of these blocks. Returns a - "block handle". Returns zero when not run on Valgrind. -

    -

  • VALGRIND_DISCARD: At some point you may want - Valgrind to stop reporting errors in terms of the blocks - defined by the previous three macros. To do this, the above - macros return a small-integer "block handle". You can pass - this block handle to VALGRIND_DISCARD. After - doing so, Valgrind will no longer be able to relate - addressing errors to the user-defined block associated with - the handle. The permissions settings associated with the - handle remain in place; this just affects how errors are - reported, not whether they are reported. Returns 1 for an - invalid handle and 0 for a valid handle (although passing - invalid handles is harmless). Always returns 0 when not run - on Valgrind. -

    -

  • VALGRIND_CHECK_NOACCESS, - VALGRIND_CHECK_WRITABLE and - VALGRIND_CHECK_READABLE: check immediately - whether or not the given address range has the relevant - property, and if not, print an error message. Also, for the - convenience of the client, returns zero if the relevant - property holds; otherwise, the returned value is the address - of the first byte for which the property is not true. - Always returns 0 when not run on Valgrind. -

    -

  • VALGRIND_CHECK_NOACCESS: a quick and easy way - to find out whether Valgrind thinks a particular variable - (lvalue, to be precise) is addressible and defined. Prints - an error message if not. Returns no value. -

    -

  • VALGRIND_MAKE_NOACCESS_STACK: a highly - experimental feature. Similarly to - VALGRIND_MAKE_NOACCESS, this marks an address - range as inaccessible, so that subsequent accesses to an - address in the range gives an error. However, this macro - does not return a block handle. Instead, all annotations - created like this are reviewed at each client - ret (subroutine return) instruction, and those - which now define an address range block the client's stack - pointer register (%esp) are automatically - deleted. -

    - In other words, this macro allows the client to tell - Valgrind about red-zones on its own stack. Valgrind - automatically discards this information when the stack - retreats past such blocks. Beware: hacky and flaky, and - probably interacts badly with the new pthread support. -

    -

  • RUNNING_ON_VALGRIND: returns 1 if running on - Valgrind, 0 if running on the real CPU. -

    -

  • VALGRIND_DO_LEAK_CHECK: run the memory leak detector - right now. Returns no value. I guess this could be used to - incrementally check for leaks between arbitrary places in the - program's execution. Warning: not properly tested! -

    -

  • VALGRIND_DISCARD_TRANSLATIONS: discard translations - of code in the specified address range. Useful if you are - debugging a JITter or some other dynamic code generation system. - After this call, attempts to execute code in the invalidated - address range will cause valgrind to make new translations of that - code, which is probably the semantics you want. Note that this is - implemented naively, and involves checking all 200191 entries in - the translation table to see if any of them overlap the specified - address range. So try not to call it often, or performance will - nosedive. Note that you can be clever about this: you only need - to call it when an area which previously contained code is - overwritten with new code. You can choose to write code into - fresh memory, and just call this occasionally to discard large - chunks of old code all at once. -

    - Warning: minimally tested, especially for the cache simulator. -

-

- - - -

2.9  Support for POSIX Pthreads

- -As of late April 02, Valgrind supports programs which use POSIX -pthreads. Doing this has proved technically challenging but is now -mostly complete. It works well enough for significant threaded -applications to work. -

-It works as follows: threaded apps are (dynamically) linked against -libpthread.so. Usually this is the one installed with -your Linux distribution. Valgrind, however, supplies its own -libpthread.so and automatically connects your program to -it instead. -

-The fake libpthread.so and Valgrind cooperate to -implement a user-space pthreads package. This approach avoids the -horrible implementation problems of implementing a truly -multiprocessor version of Valgrind, but it does mean that threaded -apps run only on one CPU, even if you have a multiprocessor machine. -

-Valgrind schedules your threads in a round-robin fashion, with all -threads having equal priority. It switches threads every 50000 basic -blocks (typically around 300000 x86 instructions), which means you'll -get a much finer interleaving of thread executions than when run -natively. This in itself may cause your program to behave differently -if you have some kind of concurrency, critical race, locking, or -similar, bugs. -

-The current (valgrind-1.0 release) state of pthread support is as -follows: -

    -
  • Mutexes, condition variables, thread-specific data, - pthread_once, reader-writer locks, semaphores, - cleanup stacks, cancellation and thread detaching currently work. - Various attribute-like calls are handled but ignored; you get a - warning message. -

    -

  • Currently the following syscalls are thread-safe (nonblocking): - write read nanosleep - sleep select poll - recvmsg and - accept. -

    -

  • Signals in pthreads are now handled properly(ish): - pthread_sigmask, pthread_kill, - sigwait and raise are now implemented. - Each thread has its own signal mask, as POSIX requires. - It's a bit kludgey -- there's a system-wide pending signal set, - rather than one for each thread. But hey. -
- - -As of 18 May 02, the following threaded programs now work fine on my -RedHat 7.2 box: Opera 6.0Beta2, KNode in KDE 3.0, Mozilla-0.9.2.1 and -Galeon-0.11.3, both as supplied with RedHat 7.2. Also Mozilla 1.0RC2. -OpenOffice 1.0. MySQL 3.something (the current stable release). - - -

2.10  Building and installing

- -We now use the standard Unix ./configure, -make, make install mechanism, and I have -attempted to ensure that it works on machines with kernel 2.2 or 2.4 -and glibc 2.1.X or 2.2.X. I don't think there is much else to say. -There are no options apart from the usual --prefix that -you should give to ./configure. - -

-The configure script tests the version of the X server -currently indicated by the current $DISPLAY. This is a -known bug. The intention was to detect the version of the current -XFree86 client libraries, so that correct suppressions could be -selected for them, but instead the test checks the server version. -This is just plain wrong. - -

-If you are building a binary package of Valgrind for distribution, -please read README_PACKAGERS. It contains some important -information. - -

-Apart from that there is no excitement here. Let me know if you have -build problems. - - - - -

2.11  If you have problems

-Mail me (jseward@acm.org). - -

See Section 4 for the known limitations of -Valgrind, and for a list of programs which are known not to work on -it. - -

The translator/instrumentor has a lot of assertions in it. They -are permanently enabled, and I have no plans to disable them. If one -of these breaks, please mail me! - -

If you get an assertion failure on the expression -chunkSane(ch) in vg_free() in -vg_malloc.c, this may have happened because your program -wrote off the end of a malloc'd block, or before its beginning. -Valgrind should have emitted a proper message to that effect before -dying in this way. This is a known problem which I should fix. -

- -


- - -

3  Details of the checking machinery

- -Read this section if you want to know, in detail, exactly what and how -Valgrind is checking. - - -

3.1  Valid-value (V) bits

- -It is simplest to think of Valgrind implementing a synthetic Intel x86 -CPU which is identical to a real CPU, except for one crucial detail. -Every bit (literally) of data processed, stored and handled by the -real CPU has, in the synthetic CPU, an associated "valid-value" bit, -which says whether or not the accompanying bit has a legitimate value. -In the discussions which follow, this bit is referred to as the V -(valid-value) bit. - -

Each byte in the system therefore has a 8 V bits which follow -it wherever it goes. For example, when the CPU loads a word-size item -(4 bytes) from memory, it also loads the corresponding 32 V bits from -a bitmap which stores the V bits for the process' entire address -space. If the CPU should later write the whole or some part of that -value to memory at a different address, the relevant V bits will be -stored back in the V-bit bitmap. - -

In short, each bit in the system has an associated V bit, which -follows it around everywhere, even inside the CPU. Yes, the CPU's -(integer and %eflags) registers have their own V bit -vectors. - -

Copying values around does not cause Valgrind to check for, or -report on, errors. However, when a value is used in a way which might -conceivably affect the outcome of your program's computation, the -associated V bits are immediately checked. If any of these indicate -that the value is undefined, an error is reported. - -

Here's an (admittedly nonsensical) example: -

-  int i, j;
-  int a[10], b[10];
-  for (i = 0; i < 10; i++) {
-    j = a[i];
-    b[i] = j;
-  }
-
- -

Valgrind emits no complaints about this, since it merely copies -uninitialised values from a[] into b[], and -doesn't use them in any way. However, if the loop is changed to -

-  for (i = 0; i < 10; i++) {
-    j += a[i];
-  }
-  if (j == 77) 
-     printf("hello there\n");
-
-then Valgrind will complain, at the if, that the -condition depends on uninitialised values. - -

Most low level operations, such as adds, cause Valgrind to -use the V bits for the operands to calculate the V bits for the -result. Even if the result is partially or wholly undefined, -it does not complain. - -

Checks on definedness only occur in two places: when a value is -used to generate a memory address, and where control flow decision -needs to be made. Also, when a system call is detected, valgrind -checks definedness of parameters as required. - -

If a check should detect undefinedness, an error message is -issued. The resulting value is subsequently regarded as well-defined. -To do otherwise would give long chains of error messages. In effect, -we say that undefined values are non-infectious. - -

This sounds overcomplicated. Why not just check all reads from -memory, and complain if an undefined value is loaded into a CPU register? -Well, that doesn't work well, because perfectly legitimate C programs routinely -copy uninitialised values around in memory, and we don't want endless complaints -about that. Here's the canonical example. Consider a struct -like this: -

-  struct S { int x; char c; };
-  struct S s1, s2;
-  s1.x = 42;
-  s1.c = 'z';
-  s2 = s1;
-
- -

The question to ask is: how large is struct S, in -bytes? An int is 4 bytes and a char one byte, so perhaps a struct S -occupies 5 bytes? Wrong. All (non-toy) compilers I know of will -round the size of struct S up to a whole number of words, -in this case 8 bytes. Not doing this forces compilers to generate -truly appalling code for subscripting arrays of struct -S's. - -

So s1 occupies 8 bytes, yet only 5 of them will be initialised. -For the assignment s2 = s1, gcc generates code to copy -all 8 bytes wholesale into s2 without regard for their -meaning. If Valgrind simply checked values as they came out of -memory, it would yelp every time a structure assignment like this -happened. So the more complicated semantics described above is -necessary. This allows gcc to copy s1 into -s2 any way it likes, and a warning will only be emitted -if the uninitialised values are later used. - -

One final twist to this story. The above scheme allows garbage to -pass through the CPU's integer registers without complaint. It does -this by giving the integer registers V tags, passing these around in -the expected way. This complicated and computationally expensive to -do, but is necessary. Valgrind is more simplistic about -floating-point loads and stores. In particular, V bits for data read -as a result of floating-point loads are checked at the load -instruction. So if your program uses the floating-point registers to -do memory-to-memory copies, you will get complaints about -uninitialised values. Fortunately, I have not yet encountered a -program which (ab)uses the floating-point registers in this way. - - -

3.2  Valid-address (A) bits

- -Notice that the previous section describes how the validity of values -is established and maintained without having to say whether the -program does or does not have the right to access any particular -memory location. We now consider the latter issue. - -

As described above, every bit in memory or in the CPU has an -associated valid-value (V) bit. In addition, all bytes in memory, but -not in the CPU, have an associated valid-address (A) bit. This -indicates whether or not the program can legitimately read or write -that location. It does not give any indication of the validity or the -data at that location -- that's the job of the V bits -- only whether -or not the location may be accessed. - -

Every time your program reads or writes memory, Valgrind checks the -A bits associated with the address. If any of them indicate an -invalid address, an error is emitted. Note that the reads and writes -themselves do not change the A bits, only consult them. - -

So how do the A bits get set/cleared? Like this: - -

    -
  • When the program starts, all the global data areas are marked as - accessible.

  • -

    - -

  • When the program does malloc/new, the A bits for the exactly the - area allocated, and not a byte more, are marked as accessible. - Upon freeing the area the A bits are changed to indicate - inaccessibility.

  • -

    - -

  • When the stack pointer register (%esp) moves up or down, A bits - are set. The rule is that the area from %esp up to the base of - the stack is marked as accessible, and below %esp is - inaccessible. (If that sounds illogical, bear in mind that the - stack grows down, not up, on almost all Unix systems, including - GNU/Linux.) Tracking %esp like this has the useful side-effect - that the section of stack used by a function for local variables - etc is automatically marked accessible on function entry and - inaccessible on exit.

  • -

    - -

  • When doing system calls, A bits are changed appropriately. For - example, mmap() magically makes files appear in the process's - address space, so the A bits must be updated if mmap() - succeeds.

  • -

    - -

  • Optionally, your program can tell Valgrind about such changes - explicitly, using the client request mechanism described above. -
- - - -

3.3  Putting it all together

-Valgrind's checking machinery can be summarised as follows: - -
    -
  • Each byte in memory has 8 associated V (valid-value) bits, - saying whether or not the byte has a defined value, and a single - A (valid-address) bit, saying whether or not the program - currently has the right to read/write that address.

  • -

    - -

  • When memory is read or written, the relevant A bits are - consulted. If they indicate an invalid address, Valgrind emits - an Invalid read or Invalid write error.

  • -

    - -

  • When memory is read into the CPU's integer registers, the - relevant V bits are fetched from memory and stored in the - simulated CPU. They are not consulted.

  • -

    - -

  • When an integer register is written out to memory, the V bits - for that register are written back to memory too.

  • -

    - -

  • When memory is read into the CPU's floating point registers, the - relevant V bits are read from memory and they are immediately - checked. If any are invalid, an uninitialised value error is - emitted. This precludes using the floating-point registers to - copy possibly-uninitialised memory, but simplifies Valgrind in - that it does not have to track the validity status of the - floating-point registers.

  • -

    - -

  • As a result, when a floating-point register is written to - memory, the associated V bits are set to indicate a valid - value.

  • -

    - -

  • When values in integer CPU registers are used to generate a - memory address, or to determine the outcome of a conditional - branch, the V bits for those values are checked, and an error - emitted if any of them are undefined.

  • -

    - -

  • When values in integer CPU registers are used for any other - purpose, Valgrind computes the V bits for the result, but does - not check them.

  • -

    - -

  • One the V bits for a value in the CPU have been checked, they - are then set to indicate validity. This avoids long chains of - errors.

  • -

    - -

  • When values are loaded from memory, valgrind checks the A bits - for that location and issues an illegal-address warning if - needed. In that case, the V bits loaded are forced to indicate - Valid, despite the location being invalid. -

    - This apparently strange choice reduces the amount of confusing - information presented to the user. It avoids the - unpleasant phenomenon in which memory is read from a place which - is both unaddressible and contains invalid values, and, as a - result, you get not only an invalid-address (read/write) error, - but also a potentially large set of uninitialised-value errors, - one for every time the value is used. -

    - There is a hazy boundary case to do with multi-byte loads from - addresses which are partially valid and partially invalid. See - details of the flag --partial-loads-ok for details. -


  • -
- -Valgrind intercepts calls to malloc, calloc, realloc, valloc, -memalign, free, new and delete. The behaviour you get is: - -
    - -
  • malloc/new: the returned memory is marked as addressible but not - having valid values. This means you have to write on it before - you can read it.

  • -

    - -

  • calloc: returned memory is marked both addressible and valid, - since calloc() clears the area to zero.

  • -

    - -

  • realloc: if the new size is larger than the old, the new section - is addressible but invalid, as with malloc.

  • -

    - -

  • If the new size is smaller, the dropped-off section is marked as - unaddressible. You may only pass to realloc a pointer - previously issued to you by malloc/calloc/new/realloc.

  • -

    - -

  • free/delete: you may only pass to free a pointer previously - issued to you by malloc/calloc/new/realloc, or the value - NULL. Otherwise, Valgrind complains. If the pointer is indeed - valid, Valgrind marks the entire area it points at as - unaddressible, and places the block in the freed-blocks-queue. - The aim is to defer as long as possible reallocation of this - block. Until that happens, all attempts to access it will - elicit an invalid-address error, as you would hope.

  • -
- - - - -

3.4  Signals

- -Valgrind provides suitable handling of signals, so, provided you stick -to POSIX stuff, you should be ok. Basic sigaction() and sigprocmask() -are handled. Signal handlers may return in the normal way or do -longjmp(); both should work ok. As specified by POSIX, a signal is -blocked in its own handler. Default actions for signals should work -as before. Etc, etc. - -

Under the hood, dealing with signals is a real pain, and Valgrind's -simulation leaves much to be desired. If your program does -way-strange stuff with signals, bad things may happen. If so, let me -know. I don't promise to fix it, but I'd at least like to be aware of -it. - - - -

3.5  Memory leak detection

- -Valgrind keeps track of all memory blocks issued in response to calls -to malloc/calloc/realloc/new. So when the program exits, it knows -which blocks are still outstanding -- have not been returned, in other -words. Ideally, you want your program to have no blocks still in use -at exit. But many programs do. - -

For each such block, Valgrind scans the entire address space of the -process, looking for pointers to the block. One of three situations -may result: - -

    -
  • A pointer to the start of the block is found. This usually - indicates programming sloppiness; since the block is still - pointed at, the programmer could, at least in principle, free'd - it before program exit.

  • -

    - -

  • A pointer to the interior of the block is found. The pointer - might originally have pointed to the start and have been moved - along, or it might be entirely unrelated. Valgrind deems such a - block as "dubious", that is, possibly leaked, - because it's unclear whether or - not a pointer to it still exists.

  • -

    - -

  • The worst outcome is that no pointer to the block can be found. - The block is classified as "leaked", because the - programmer could not possibly have free'd it at program exit, - since no pointer to it exists. This might be a symptom of - having lost the pointer at some earlier point in the - program.
  • -
- -Valgrind reports summaries about leaked and dubious blocks. -For each such block, it will also tell you where the block was -allocated. This should help you figure out why the pointer to it has -been lost. In general, you should attempt to ensure your programs do -not have any leaked or dubious blocks at exit. - -

The precise area of memory in which Valgrind searches for pointers -is: all naturally-aligned 4-byte words for which all A bits indicate -addressibility and all V bits indicated that the stored value is -actually valid. - -


- - - -

4  Limitations

- -The following list of limitations seems depressingly long. However, -most programs actually work fine. - -

Valgrind will run x86-GNU/Linux ELF dynamically linked binaries, on -a kernel 2.2.X or 2.4.X system, subject to the following constraints: - -

    -
  • No MMX, SSE, SSE2, 3DNow instructions. If the translator - encounters these, Valgrind will simply give up. It may be - possible to add support for them at a later time. Intel added a - few instructions such as "cmov" to the integer instruction set - on Pentium and later processors, and these are supported. - Nevertheless it's safest to think of Valgrind as implementing - the 486 instruction set.

  • -

    - -

  • Pthreads support is improving, but there are still significant - limitations in that department. See the section above on - Pthreads. Note that your program must be dynamically linked - against libpthread.so, so that Valgrind can - substitute its own implementation at program startup time. If - you're statically linked against it, things will fail - badly.

  • -

    - -

  • Valgrind assumes that the floating point registers are not used - as intermediaries in memory-to-memory copies, so it immediately - checks V bits in floating-point loads/stores. If you want to - write code which copies around possibly-uninitialised values, - you must ensure these travel through the integer registers, not - the FPU.

  • -

    - -

  • If your program does its own memory management, rather than - using malloc/new/free/delete, it should still work, but - Valgrind's error checking won't be so effective.

  • -

    - -

  • Valgrind's signal simulation is not as robust as it could be. - Basic POSIX-compliant sigaction and sigprocmask functionality is - supplied, but it's conceivable that things could go badly awry - if you do wierd things with signals. Workaround: don't. - Programs that do non-POSIX signal tricks are in any case - inherently unportable, so should be avoided if - possible.

  • -

    - -

  • Programs which switch stacks are not well handled. Valgrind - does have support for this, but I don't have great faith in it. - It's difficult -- there's no cast-iron way to decide whether a - large change in %esp is as a result of the program switching - stacks, or merely allocating a large object temporarily on the - current stack -- yet Valgrind needs to handle the two situations - differently. 1 May 02: this probably interacts badly with the - new pthread support. I haven't checked properly.

  • -

    - -

  • x86 instructions, and system calls, have been implemented on - demand. So it's possible, although unlikely, that a program - will fall over with a message to that effect. If this happens, - please mail me ALL the details printed out, so I can try and - implement the missing feature.

  • -

    - -

  • x86 floating point works correctly, but floating-point code may - run even more slowly than integer code, due to my simplistic - approach to FPU emulation.

  • -

    - -

  • You can't Valgrind-ize statically linked binaries. Valgrind - relies on the dynamic-link mechanism to gain control at - startup.

  • -

    - -

  • Memory consumption of your program is majorly increased whilst - running under Valgrind. This is due to the large amount of - adminstrative information maintained behind the scenes. Another - cause is that Valgrind dynamically translates the original - executable. Translated, instrumented code is 14-16 times larger - than the original (!) so you can easily end up with 30+ MB of - translations when running (eg) a web browser. -
  • -
- -Programs which are known not to work are: - -
    -
  • emacs starts up but immediately concludes it is out of memory - and aborts. Emacs has it's own memory-management scheme, but I - don't understand why this should interact so badly with - Valgrind. Emacs works fine if you build it to use the standard - malloc/free routines.

  • -

    -

- -Known platform-specific limitations, as of release 1.0.0: - -
    -
  • On Red Hat 7.3, there have been reports of link errors (at - program start time) for threaded programs using - __pthread_clock_gettime and - __pthread_clock_settime. This appears to be due to - /lib/librt-2.2.5.so needing them. Unfortunately I - do not understand enough about this problem to fix it properly, - and I can't reproduce it on my test RedHat 7.3 system. Please - mail me if you have more information / understanding.

  • -

    -

  • - 1.0.0 now partially works on Red Hat 7.3.92 ("Limbo" - public beta). However, don't expect a smooth ride. - Basically valgrind won't work as-is with any - glibc-2.3 based system. Limbo is just a little pre glibc-2.3 - and it just about works. Limbo is also gcc-3.1 based and so - suffers from the problems in the following point.

  • -

    -

  • - Inlining of string functions with gcc-3.1 or above causes a - large number of false reports of uninitialised value uses. I - know what the problem is and roughly how to fix it, but I need - to devise a reasonably efficient fix. Try to reduce the - optimisation level, or use -fno-builtin-strlen in - the meantime. Or use an earlier gcc.

  • -

    -

- - -


- - - -

5  How it works -- a rough overview

-Some gory details, for those with a passion for gory details. You -don't need to read this section if all you want to do is use Valgrind. - - -

5.1  Getting started

- -Valgrind is compiled into a shared object, valgrind.so. The shell -script valgrind sets the LD_PRELOAD environment variable to point to -valgrind.so. This causes the .so to be loaded as an extra library to -any subsequently executed dynamically-linked ELF binary, viz, the -program you want to debug. - -

The dynamic linker allows each .so in the process image to have an -initialisation function which is run before main(). It also allows -each .so to have a finalisation function run after main() exits. - -

When valgrind.so's initialisation function is called by the dynamic -linker, the synthetic CPU to starts up. The real CPU remains locked -in valgrind.so for the entire rest of the program, but the synthetic -CPU returns from the initialisation function. Startup of the program -now continues as usual -- the dynamic linker calls all the other .so's -initialisation routines, and eventually runs main(). This all runs on -the synthetic CPU, not the real one, but the client program cannot -tell the difference. - -

Eventually main() exits, so the synthetic CPU calls valgrind.so's -finalisation function. Valgrind detects this, and uses it as its cue -to exit. It prints summaries of all errors detected, possibly checks -for memory leaks, and then exits the finalisation routine, but now on -the real CPU. The synthetic CPU has now lost control -- permanently --- so the program exits back to the OS on the real CPU, just as it -would have done anyway. - -

On entry, Valgrind switches stacks, so it runs on its own stack. -On exit, it switches back. This means that the client program -continues to run on its own stack, so we can switch back and forth -between running it on the simulated and real CPUs without difficulty. -This was an important design decision, because it makes it easy (well, -significantly less difficult) to debug the synthetic CPU. - - - -

5.2  The translation/instrumentation engine

- -Valgrind does not directly run any of the original program's code. Only -instrumented translations are run. Valgrind maintains a translation -table, which allows it to find the translation quickly for any branch -target (code address). If no translation has yet been made, the -translator - a just-in-time translator - is summoned. This makes an -instrumented translation, which is added to the collection of -translations. Subsequent jumps to that address will use this -translation. - -

Valgrind no longer directly supports detection of self-modifying -code. Such checking is expensive, and in practice (fortunately) -almost no applications need it. However, to help people who are -debugging dynamic code generation systems, there is a Client Request -(basically a macro you can put in your program) which directs Valgrind -to discard translations in a given address range. So Valgrind can -still work in this situation provided the client tells it when -code has become out-of-date and needs to be retranslated. - -

The JITter translates basic blocks -- blocks of straight-line-code --- as single entities. To minimise the considerable difficulties of -dealing with the x86 instruction set, x86 instructions are first -translated to a RISC-like intermediate code, similar to sparc code, -but with an infinite number of virtual integer registers. Initially -each insn is translated seperately, and there is no attempt at -instrumentation. - -

The intermediate code is improved, mostly so as to try and cache -the simulated machine's registers in the real machine's registers over -several simulated instructions. This is often very effective. Also, -we try to remove redundant updates of the simulated machines's -condition-code register. - -

The intermediate code is then instrumented, giving more -intermediate code. There are a few extra intermediate-code operations -to support instrumentation; it is all refreshingly simple. After -instrumentation there is a cleanup pass to remove redundant value -checks. - -

This gives instrumented intermediate code which mentions arbitrary -numbers of virtual registers. A linear-scan register allocator is -used to assign real registers and possibly generate spill code. All -of this is still phrased in terms of the intermediate code. This -machinery is inspired by the work of Reuben Thomas (MITE). - -

Then, and only then, is the final x86 code emitted. The -intermediate code is carefully designed so that x86 code can be -generated from it without need for spare registers or other -inconveniences. - -

The translations are managed using a traditional LRU-based caching -scheme. The translation cache has a default size of about 14MB. - - - -

5.3  Tracking the status of memory

Each byte in the -process' address space has nine bits associated with it: one A bit and -eight V bits. The A and V bits for each byte are stored using a -sparse array, which flexibly and efficiently covers arbitrary parts of -the 32-bit address space without imposing significant space or -performance overheads for the parts of the address space never -visited. The scheme used, and speedup hacks, are described in detail -at the top of the source file vg_memory.c, so you should read that for -the gory details. - - - -

5.4 System calls

-All system calls are intercepted. The memory status map is consulted -before and updated after each call. It's all rather tiresome. See -vg_syscall_mem.c for details. - - - -

5.5  Signals

-All system calls to sigaction() and sigprocmask() are intercepted. If -the client program is trying to set a signal handler, Valgrind makes a -note of the handler address and which signal it is for. Valgrind then -arranges for the same signal to be delivered to its own handler. - -

When such a signal arrives, Valgrind's own handler catches it, and -notes the fact. At a convenient safe point in execution, Valgrind -builds a signal delivery frame on the client's stack and runs its -handler. If the handler longjmp()s, there is nothing more to be said. -If the handler returns, Valgrind notices this, zaps the delivery -frame, and carries on where it left off before delivering the signal. - -

The purpose of this nonsense is that setting signal handlers -essentially amounts to giving callback addresses to the Linux kernel. -We can't allow this to happen, because if it did, signal handlers -would run on the real CPU, not the simulated one. This means the -checking machinery would not operate during the handler run, and, -worse, memory permissions maps would not be updated, which could cause -spurious error reports once the handler had returned. - -

An even worse thing would happen if the signal handler longjmp'd -rather than returned: Valgrind would completely lose control of the -client program. - -

Upshot: we can't allow the client to install signal handlers -directly. Instead, Valgrind must catch, on behalf of the client, any -signal the client asks to catch, and must delivery it to the client on -the simulated CPU, not the real one. This involves considerable -gruesome fakery; see vg_signals.c for details. -

- -


- - -

6  Example

-This is the log for a run of a small program. The program is in fact -correct, and the reported error is as the result of a potentially serious -code generation bug in GNU g++ (snapshot 20010527). -
-sewardj@phoenix:~/newmat10$
-~/Valgrind-6/valgrind -v ./bogon 
-==25832== Valgrind 0.10, a memory error detector for x86 RedHat 7.1.
-==25832== Copyright (C) 2000-2001, and GNU GPL'd, by Julian Seward.
-==25832== Startup, with flags:
-==25832== --suppressions=/home/sewardj/Valgrind/redhat71.supp
-==25832== reading syms from /lib/ld-linux.so.2
-==25832== reading syms from /lib/libc.so.6
-==25832== reading syms from /mnt/pima/jrs/Inst/lib/libgcc_s.so.0
-==25832== reading syms from /lib/libm.so.6
-==25832== reading syms from /mnt/pima/jrs/Inst/lib/libstdc++.so.3
-==25832== reading syms from /home/sewardj/Valgrind/valgrind.so
-==25832== reading syms from /proc/self/exe
-==25832== loaded 5950 symbols, 142333 line number locations
-==25832== 
-==25832== Invalid read of size 4
-==25832==    at 0x8048724: _ZN10BandMatrix6ReSizeEiii (bogon.cpp:45)
-==25832==    by 0x80487AF: main (bogon.cpp:66)
-==25832==    by 0x40371E5E: __libc_start_main (libc-start.c:129)
-==25832==    by 0x80485D1: (within /home/sewardj/newmat10/bogon)
-==25832==    Address 0xBFFFF74C is not stack'd, malloc'd or free'd
-==25832==
-==25832== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
-==25832== malloc/free: in use at exit: 0 bytes in 0 blocks.
-==25832== malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
-==25832== For a detailed leak analysis, rerun with: --leak-check=yes
-==25832==
-==25832== exiting, did 1881 basic blocks, 0 misses.
-==25832== 223 translations, 3626 bytes in, 56801 bytes out.
-
-

The GCC folks fixed this about a week before gcc-3.0 shipped. -


-

- - - - -

7  Cache profiling

-As well as memory debugging, Valgrind also allows you to do cache simulations -and annotate your source line-by-line with the number of cache misses. In -particular, it records: -
    -
  • L1 instruction cache reads and misses; -
  • L1 data cache reads and read misses, writes and write misses; -
  • L2 unified cache reads and read misses, writes and writes misses. -
-On a modern x86 machine, an L1 miss will typically cost around 10 cycles, -and an L2 miss can cost as much as 200 cycles. Detailed cache profiling can be -very useful for improving the performance of your program.

- -Also, since one instruction cache read is performed per instruction executed, -you can find out how many instructions are executed per line, which can be -useful for traditional profiling and test coverage.

- -Any feedback, bug-fixes, suggestions, etc, welcome. - - -

7.1  Overview

-First off, as for normal Valgrind use, you probably want to turn on debugging -info (the -g flag). But by contrast with normal Valgrind use, you -probably do want to turn optimisation on, since you should profile your -program as it will be normally run. - -The two steps are: -
    -
  1. Run your program with cachegrind in front of the - normal command line invocation. When the program finishes, - Valgrind will print summary cache statistics. It also collects - line-by-line information in a file cachegrind.out. -

    - This step should be done every time you want to collect - information about a new program, a changed program, or about the - same program with different input. -

  2. -

    -

  3. Generate a function-by-function summary, and possibly annotate - source files with 'vg_annotate'. Source files to annotate can be - specified manually, or manually on the command line, or - "interesting" source files can be annotated automatically with - the --auto=yes option. You can annotate C/C++ - files or assembly language files equally easily. -

    - This step can be performed as many times as you like for each - Step 2. You may want to do multiple annotations showing - different information each time.

    -

  4. -
- -The steps are described in detail in the following sections.

- - -

7.2  Cache simulation specifics

- -Cachegrind uses a simulation for a machine with a split L1 cache and a unified -L2 cache. This configuration is used for all (modern) x86-based machines we -are aware of. Old Cyrix CPUs had a unified I and D L1 cache, but they are -ancient history now.

- -The more specific characteristics of the simulation are as follows. - -

    -
  • Write-allocate: when a write miss occurs, the block written to - is brought into the D1 cache. Most modern caches have this - property.
  • - -

  • Bit-selection hash function: the line(s) in the cache to which a - memory block maps is chosen by the middle bits M--(M+N-1) of the - byte address, where: -
      -
    •  line size = 2^M bytes 
    • -
    • (cache size / line size) = 2^N bytes
    • -
  • - -

  • Inclusive L2 cache: the L2 cache replicates all the entries of - the L1 cache. This is standard on Pentium chips, but AMD - Athlons use an exclusive L2 cache that only holds blocks evicted - from L1. Ditto AMD Durons and most modern VIAs.
  • -

- -The cache configuration simulated (cache size, associativity and line size) is -determined automagically using the CPUID instruction. If you have an old -machine that (a) doesn't support the CPUID instruction, or (b) supports it in -an early incarnation that doesn't give any cache information, then Cachegrind -will fall back to using a default configuration (that of a model 3/4 Athlon). -Cachegrind will tell you if this happens. You can manually specify one, two or -all three levels (I1/D1/L2) of the cache from the command line using the ---I1, --D1 and --L2 options.

- -Other noteworthy behaviour: - -

    -
  • References that straddle two cache lines are treated as follows: -
      -
    • If both blocks hit --> counted as one hit
    • -
    • If one block hits, the other misses --> counted as one miss
    • -
    • If both blocks miss --> counted as one miss (not two)
    • -

  • - -
  • Instructions that modify a memory location (eg. inc and - dec) are counted as doing just a read, ie. a single data - reference. This may seem strange, but since the write can never cause a - miss (the read guarantees the block is in the cache) it's not very - interesting.

    - - Thus it measures not the number of times the data cache is accessed, but - the number of times a data cache miss could occur.

    -

  • -
- -If you are interested in simulating a cache with different properties, it is -not particularly hard to write your own cache simulator, or to modify the -existing ones in vg_cachesim_I1.c, vg_cachesim_D1.c, -vg_cachesim_L2.c and vg_cachesim_gen.c. We'd be -interested to hear from anyone who does. - - -

7.3  Profiling programs

- -Cache profiling is enabled by using the --cachesim=yes -option to the valgrind shell script. Alternatively, it -is probably more convenient to use the cachegrind script. -Either way automatically turns off Valgrind's memory checking functions, -since the cache simulation is slow enough already, and you probably -don't want to do both at once. -

-To gather cache profiling information about the program ls --l, type: - -

cachegrind ls -l
- -The program will execute (slowly). Upon completion, summary statistics -that look like this will be printed: - -
-==31751== I   refs:      27,742,716
-==31751== I1  misses:           276
-==31751== L2  misses:           275
-==31751== I1  miss rate:        0.0%
-==31751== L2i miss rate:        0.0%
-==31751== 
-==31751== D   refs:      15,430,290  (10,955,517 rd + 4,474,773 wr)
-==31751== D1  misses:        41,185  (    21,905 rd +    19,280 wr)
-==31751== L2  misses:        23,085  (     3,987 rd +    19,098 wr)
-==31751== D1  miss rate:        0.2% (       0.1%   +       0.4%)
-==31751== L2d miss rate:        0.1% (       0.0%   +       0.4%)
-==31751== 
-==31751== L2 misses:         23,360  (     4,262 rd +    19,098 wr)
-==31751== L2 miss rate:         0.0% (       0.0%   +       0.4%)
-
- -Cache accesses for instruction fetches are summarised first, giving the -number of fetches made (this is the number of instructions executed, which -can be useful to know in its own right), the number of I1 misses, and the -number of L2 instruction (L2i) misses.

- -Cache accesses for data follow. The information is similar to that of the -instruction fetches, except that the values are also shown split between reads -and writes (note each row's rd and wr values add up -to the row's total).

- -Combined instruction and data figures for the L2 cache follow that.

- - -

7.4  Output file

- -As well as printing summary information, Cachegrind also writes -line-by-line cache profiling information to a file named -cachegrind.out. This file is human-readable, but is best -interpreted by the accompanying program vg_annotate, -described in the next section. -

-Things to note about the cachegrind.out file: -

    -
  • It is written every time valgrind --cachesim=yes or - cachegrind is run, and will overwrite any existing - cachegrind.out in the current directory.
  • -

    -

  • It can be huge: ls -l generates a file of about - 350KB. Browsing a few files and web pages with a Konqueror - built with full debugging information generates a file - of around 15 MB.
  • -
- - -

7.5  Cachegrind options

-Cachegrind accepts all the options that Valgrind does, although some of them -(ones related to memory checking) don't do anything when cache profiling.

- -The interesting cache-simulation specific options are: - -

    -
  • --I1=<size>,<associativity>,<line_size>
    - --D1=<size>,<associativity>,<line_size>
    - --L2=<size>,<associativity>,<line_size>

    - [default: uses CPUID for automagic cache configuration]

    - - Manually specifies the I1/D1/L2 cache configuration, where - size and line_size are measured in bytes. The - three items must be comma-separated, but with no spaces, eg: - -

    cachegrind --I1=65535,2,64
    - - You can specify one, two or three of the I1/D1/L2 caches. Any level not - manually specified will be simulated using the configuration found in the - normal way (via the CPUID instruction, or failing that, via defaults). -
- - - -

7.6  Annotating C/C++ programs

- -Before using vg_annotate, it is worth widening your -window to be at least 120-characters wide if possible, as the output -lines can be quite long. -

-To get a function-by-function summary, run vg_annotate in -directory containing a cachegrind.out file. The output -looks like this: - -

---------------------------------------------------------------------------------
-I1 cache:              65536 B, 64 B, 2-way associative
-D1 cache:              65536 B, 64 B, 2-way associative
-L2 cache:              262144 B, 64 B, 8-way associative
-Command:               concord vg_to_ucode.c
-Events recorded:       Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw
-Events shown:          Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw
-Event sort order:      Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw
-Threshold:             99%
-Chosen for annotation:
-Auto-annotation:       on
-
---------------------------------------------------------------------------------
-Ir         I1mr I2mr Dr         D1mr   D2mr  Dw        D1mw   D2mw
---------------------------------------------------------------------------------
-27,742,716  276  275 10,955,517 21,905 3,987 4,474,773 19,280 19,098  PROGRAM TOTALS
-
---------------------------------------------------------------------------------
-Ir        I1mr I2mr Dr        D1mr  D2mr  Dw        D1mw   D2mw    file:function
---------------------------------------------------------------------------------
-8,821,482    5    5 2,242,702 1,621    73 1,794,230      0      0  getc.c:_IO_getc
-5,222,023    4    4 2,276,334    16    12   875,959      1      1  concord.c:get_word
-2,649,248    2    2 1,344,810 7,326 1,385         .      .      .  vg_main.c:strcmp
-2,521,927    2    2   591,215     0     0   179,398      0      0  concord.c:hash
-2,242,740    2    2 1,046,612   568    22   448,548      0      0  ctype.c:tolower
-1,496,937    4    4   630,874 9,000 1,400   279,388      0      0  concord.c:insert
-  897,991   51   51   897,831    95    30        62      1      1  ???:???
-  598,068    1    1   299,034     0     0   149,517      0      0  ../sysdeps/generic/lockfile.c:__flockfile
-  598,068    0    0   299,034     0     0   149,517      0      0  ../sysdeps/generic/lockfile.c:__funlockfile
-  598,024    4    4   213,580    35    16   149,506      0      0  vg_clientmalloc.c:malloc
-  446,587    1    1   215,973 2,167   430   129,948 14,057 13,957  concord.c:add_existing
-  341,760    2    2   128,160     0     0   128,160      0      0  vg_clientmalloc.c:vg_trap_here_WRAPPER
-  320,782    4    4   150,711   276     0    56,027     53     53  concord.c:init_hash_table
-  298,998    1    1   106,785     0     0    64,071      1      1  concord.c:create
-  149,518    0    0   149,516     0     0         1      0      0  ???:tolower@@GLIBC_2.0
-  149,518    0    0   149,516     0     0         1      0      0  ???:fgetc@@GLIBC_2.0
-   95,983    4    4    38,031     0     0    34,409  3,152  3,150  concord.c:new_word_node
-   85,440    0    0    42,720     0     0    21,360      0      0  vg_clientmalloc.c:vg_bogus_epilogue
-
- -First up is a summary of the annotation options: - -
    -
  • I1 cache, D1 cache, L2 cache: cache configuration. So you know the - configuration with which these results were obtained.
  • - -

  • Command: the command line invocation of the program under - examination.
  • - -

  • Events recorded: event abbreviations are:

    -

      -
    • Ir : I cache reads (ie. instructions executed)
    • -
    • I1mr: I1 cache read misses
    • -
    • I2mr: L2 cache instruction read misses
    • -
    • Dr : D cache reads (ie. memory reads)
    • -
    • D1mr: D1 cache read misses
    • -
    • D2mr: L2 cache data read misses
    • -
    • Dw : D cache writes (ie. memory writes)
    • -
    • D1mw: D1 cache write misses
    • -
    • D2mw: L2 cache data write misses
    • -

    - Note that D1 total accesses is given by D1mr + - D1mw, and that L2 total accesses is given by - I2mr + D2mr + D2mw.

  • - -

  • Events shown: the events shown (a subset of events gathered). This can - be adjusted with the --show option.
  • - -

  • Event sort order: the sort order in which functions are shown. For - example, in this case the functions are sorted from highest - Ir counts to lowest. If two functions have identical - Ir counts, they will then be sorted by I1mr - counts, and so on. This order can be adjusted with the - --sort option.

    - - Note that this dictates the order the functions appear. It is not - the order in which the columns appear; that is dictated by the "events - shown" line (and can be changed with the --show option). -

  • - -

  • Threshold: vg_annotate by default omits functions - that cause very low numbers of misses to avoid drowning you in - information. In this case, vg_annotate shows summaries the - functions that account for 99% of the Ir counts; - Ir is chosen as the threshold event since it is the - primary sort event. The threshold can be adjusted with the - --threshold option.
  • - -

  • Chosen for annotation: names of files specified manually for annotation; - in this case none.
  • - -

  • Auto-annotation: whether auto-annotation was requested via the - --auto=yes option. In this case no.
  • -

- -Then follows summary statistics for the whole program. These are similar -to the summary provided when running cachegrind.

- -Then follows function-by-function statistics. Each function is -identified by a file_name:function_name pair. If a column -contains only a dot it means the function never performs -that event (eg. the third row shows that strcmp() -contains no instructions that write to memory). The name -??? is used if the the file name and/or function name -could not be determined from debugging information. If most of the -entries have the form ???:??? the program probably wasn't -compiled with -g. If any code was invalidated (either due to -self-modifying code or unloading of shared objects) its counts are aggregated -into a single cost centre written as (discarded):(discarded).

- -It is worth noting that functions will come from three types of source files: -

    -
  1. From the profiled program (concord.c in this example).
  2. -
  3. From libraries (eg. getc.c)
  4. -
  5. From Valgrind's implementation of some libc functions (eg. - vg_clientmalloc.c:malloc). These are recognisable because - the filename begins with vg_, and is probably one of - vg_main.c, vg_clientmalloc.c or - vg_mylibc.c. -
  6. -
- -There are two ways to annotate source files -- by choosing them -manually, or with the --auto=yes option. To do it -manually, just specify the filenames as arguments to -vg_annotate. For example, the output from running -vg_annotate concord.c for our example produces the same -output as above followed by an annotated version of -concord.c, a section of which looks like: - -
---------------------------------------------------------------------------------
--- User-annotated source: concord.c
---------------------------------------------------------------------------------
-Ir        I1mr I2mr Dr      D1mr  D2mr  Dw      D1mw   D2mw
-
-[snip]
-
-        .    .    .       .     .     .       .      .      .  void init_hash_table(char *file_name, Word_Node *table[])
-        3    1    1       .     .     .       1      0      0  {
-        .    .    .       .     .     .       .      .      .      FILE *file_ptr;
-        .    .    .       .     .     .       .      .      .      Word_Info *data;
-        1    0    0       .     .     .       1      1      1      int line = 1, i;
-        .    .    .       .     .     .       .      .      .
-        5    0    0       .     .     .       3      0      0      data = (Word_Info *) create(sizeof(Word_Info));
-        .    .    .       .     .     .       .      .      .
-    4,991    0    0   1,995     0     0     998      0      0      for (i = 0; i < TABLE_SIZE; i++)
-    3,988    1    1   1,994     0     0     997     53     52          table[i] = NULL;
-        .    .    .       .     .     .       .      .      .
-        .    .    .       .     .     .       .      .      .      /* Open file, check it. */
-        6    0    0       1     0     0       4      0      0      file_ptr = fopen(file_name, "r");
-        2    0    0       1     0     0       .      .      .      if (!(file_ptr)) {
-        .    .    .       .     .     .       .      .      .          fprintf(stderr, "Couldn't open '%s'.\n", file_name);
-        1    1    1       .     .     .       .      .      .          exit(EXIT_FAILURE);
-        .    .    .       .     .     .       .      .      .      }
-        .    .    .       .     .     .       .      .      .
-  165,062    1    1  73,360     0     0  91,700      0      0      while ((line = get_word(data, line, file_ptr)) != EOF)
-  146,712    0    0  73,356     0     0  73,356      0      0          insert(data->;word, data->line, table);
-        .    .    .       .     .     .       .      .      .
-        4    0    0       1     0     0       2      0      0      free(data);
-        4    0    0       1     0     0       2      0      0      fclose(file_ptr);
-        3    0    0       2     0     0       .      .      .  }
-
- -(Although column widths are automatically minimised, a wide terminal is clearly -useful.)

- -Each source file is clearly marked (User-annotated source) as -having been chosen manually for annotation. If the file was found in one of -the directories specified with the -I/--include -option, the directory and file are both given.

- -Each line is annotated with its event counts. Events not applicable for a line -are represented by a `.'; this is useful for distinguishing between an event -which cannot happen, and one which can but did not.

- -Sometimes only a small section of a source file is executed. To minimise -uninteresting output, Valgrind only shows annotated lines and lines within a -small distance of annotated lines. Gaps are marked with the line numbers so -you know which part of a file the shown code comes from, eg: - -

-(figures and code for line 704)
--- line 704 ----------------------------------------
--- line 878 ----------------------------------------
-(figures and code for line 878)
-
- -The amount of context to show around annotated lines is controlled by the ---context option.

- -To get automatic annotation, run vg_annotate --auto=yes. -vg_annotate will automatically annotate every source file it can find that is -mentioned in the function-by-function summary. Therefore, the files chosen for -auto-annotation are affected by the --sort and ---threshold options. Each source file is clearly marked -(Auto-annotated source) as being chosen automatically. Any files -that could not be found are mentioned at the end of the output, eg: - -

---------------------------------------------------------------------------------
-The following files chosen for auto-annotation could not be found:
---------------------------------------------------------------------------------
-  getc.c
-  ctype.c
-  ../sysdeps/generic/lockfile.c
-
- -This is quite common for library files, since libraries are usually compiled -with debugging information, but the source files are often not present on a -system. If a file is chosen for annotation both manually and -automatically, it is marked as User-annotated source. - -Use the -I/--include option to tell Valgrind where to look for -source files if the filenames found from the debugging information aren't -specific enough. - -Beware that vg_annotate can take some time to digest large -cachegrind.out files, eg. 30 seconds or more. Also beware that -auto-annotation can produce a lot of output if your program is large! - - -

7.7  Annotating assembler programs

- -Valgrind can annotate assembler programs too, or annotate the -assembler generated for your C program. Sometimes this is useful for -understanding what is really happening when an interesting line of C -code is translated into multiple instructions.

- -To do this, you just need to assemble your .s files with -assembler-level debug information. gcc doesn't do this, but you can -use the GNU assembler with the --gstabs option to -generate object files with this information, eg: - -

as --gstabs foo.s
- -You can then profile and annotate source files in the same way as for C/C++ -programs. - - -

7.8  vg_annotate options

-
    -
  • -h, --help
  • -

  • -v, --version

    - - Help and version, as usual.

  • - -
  • --sort=A,B,C [default: order in - cachegrind.out]

    - Specifies the events upon which the sorting of the function-by-function - entries will be based. Useful if you want to concentrate on eg. I cache - misses (--sort=I1mr,I2mr), or D cache misses - (--sort=D1mr,D2mr), or L2 misses - (--sort=D2mr,I2mr).

  • - -

  • --show=A,B,C [default: all, using order in - cachegrind.out]

    - Specifies which events to show (and the column order). Default is to use - all present in the cachegrind.out file (and use the order in - the file).

  • - -

  • --threshold=X [default: 99%]

    - Sets the threshold for the function-by-function summary. Functions are - shown that account for more than X% of the primary sort event. If - auto-annotating, also affects which files are annotated. - - Note: thresholds can be set for more than one of the events by appending - any events for the --sort option with a colon and a number - (no spaces, though). E.g. if you want to see the functions that cover - 99% of L2 read misses and 99% of L2 write misses, use this option: - -

    --sort=D2mr:99,D2mw:99
    -
  • - -

  • --auto=no [default]
    - --auto=yes

    - When enabled, automatically annotates every file that is mentioned in the - function-by-function summary that can be found. Also gives a list of - those that couldn't be found. - -

  • --context=N [default: 8]

    - Print N lines of context before and after each annotated line. Avoids - printing large sections of source files that were not executed. Use a - large number (eg. 10,000) to show all source lines. -

  • - -

  • -I=<dir>, --include=<dir> - [default: empty string]

    - Adds a directory to the list in which to search for files. Multiple - -I/--include options can be given to add multiple directories. -

- - -

7.9  Warnings

-There are a couple of situations in which vg_annotate issues warnings. - -
    -
  • If a source file is more recent than the cachegrind.out - file. This is because the information in cachegrind.out is - only recorded with line numbers, so if the line numbers change at all in - the source (eg. lines added, deleted, swapped), any annotations will be - incorrect.

    - -

  • If information is recorded about line numbers past the end of a file. - This can be caused by the above problem, ie. shortening the source file - while using an old cachegrind.out file. If this happens, - the figures for the bogus lines are printed anyway (clearly marked as - bogus) in case they are important.
  • -

- - -

7.10  Things to watch out for

-Some odd things that can occur during annotation: - -
    -
  • If annotating at the assembler level, you might see something like this: - -
    -      1    0    0  .    .    .  .    .    .          leal -12(%ebp),%eax
    -      1    0    0  .    .    .  1    0    0          movl %eax,84(%ebx)
    -      2    0    0  0    0    0  1    0    0          movl $1,-20(%ebp)
    -      .    .    .  .    .    .  .    .    .          .align 4,0x90
    -      1    0    0  .    .    .  .    .    .          movl $.LnrB,%eax
    -      1    0    0  .    .    .  1    0    0          movl %eax,-16(%ebp)
    -      
    - - How can the third instruction be executed twice when the others are - executed only once? As it turns out, it isn't. Here's a dump of the - executable, using objdump -d: - -
    -      8048f25:       8d 45 f4                lea    0xfffffff4(%ebp),%eax
    -      8048f28:       89 43 54                mov    %eax,0x54(%ebx)
    -      8048f2b:       c7 45 ec 01 00 00 00    movl   $0x1,0xffffffec(%ebp)
    -      8048f32:       89 f6                   mov    %esi,%esi
    -      8048f34:       b8 08 8b 07 08          mov    $0x8078b08,%eax
    -      8048f39:       89 45 f0                mov    %eax,0xfffffff0(%ebp)
    -      
    - - Notice the extra mov %esi,%esi instruction. Where did this - come from? The GNU assembler inserted it to serve as the two bytes of - padding needed to align the movl $.LnrB,%eax instruction on - a four-byte boundary, but pretended it didn't exist when adding debug - information. Thus when Valgrind reads the debug info it thinks that the - movl $0x1,0xffffffec(%ebp) instruction covers the address - range 0x8048f2b--0x804833 by itself, and attributes the counts for the - mov %esi,%esi to it.

    -

  • - -
  • Inlined functions can cause strange results in the function-by-function - summary. If a function inline_me() is defined in - foo.h and inlined in the functions f1(), - f2() and f3() in bar.c, there will - not be a foo.h:inline_me() function entry. Instead, there - will be separate function entries for each inlining site, ie. - foo.h:f1(), foo.h:f2() and - foo.h:f3(). To find the total counts for - foo.h:inline_me(), add up the counts from each entry.

    - - The reason for this is that although the debug info output by gcc - indicates the switch from bar.c to foo.h, it - doesn't indicate the name of the function in foo.h, so - Valgrind keeps using the old one.

    - -

  • Sometimes, the same filename might be represented with a relative name - and with an absolute name in different parts of the debug info, eg: - /home/user/proj/proj.h and ../proj.h. In this - case, if you use auto-annotation, the file will be annotated twice with - the counts split between the two.

    -

  • - -
  • Files with more than 65,535 lines cause difficulties for the stabs debug - info reader. This is because the line number in the struct - nlist defined in a.out.h under Linux is only a 16-bit - value. Valgrind can handle some files with more than 65,535 lines - correctly by making some guesses to identify line number overflows. But - some cases are beyond it, in which case you'll get a warning message - explaining that annotations for the file might be incorrect.

    -

  • - -
  • If you compile some files with -g and some without, some - events that take place in a file without debug info could be attributed - to the last line of a file with debug info (whichever one gets placed - before the non-debug-info file in the executable).

    -

  • -
- -This list looks long, but these cases should be fairly rare.

- -Note: stabs is not an easy format to read. If you come across bizarre -annotations that look like might be caused by a bug in the stabs reader, -please let us know.

- - -

7.11  Accuracy

-Valgrind's cache profiling has a number of shortcomings: - -
    -
  • It doesn't account for kernel activity -- the effect of system calls on - the cache contents is ignored.
  • - -

  • It doesn't account for other process activity (although this is probably - desirable when considering a single program).
  • - -

  • It doesn't account for virtual-to-physical address mappings; hence the - entire simulation is not a true representation of what's happening in the - cache.
  • - -

  • It doesn't account for cache misses not visible at the instruction level, - eg. those arising from TLB misses, or speculative execution.
  • - -

  • Valgrind's custom malloc() will allocate memory in different - ways to the standard malloc(), which could warp the results. -
  • - -

  • Valgrind's custom threads implementation will schedule threads - differently to the standard one. This too could warp the results for - threaded programs. -
  • - -

  • The instructions bts, btr and btc - will incorrectly be counted as doing a data read if both the arguments - are registers, eg: - -
    btsl %eax, %edx
    - - This should only happen rarely. -
- -Another thing worth nothing is that results are very sensitive. Changing the -size of the valgrind.so file, the size of the program being -profiled, or even the length of its name can perturb the results. Variations -will be small, but don't expect perfectly repeatable results if your program -changes at all.

- -While these factors mean you shouldn't trust the results to be super-accurate, -hopefully they should be close enough to be useful.

- - -

7.12  Todo

-
    -
  • Program start-up/shut-down calls a lot of functions that aren't - interesting and just complicate the output. Would be nice to exclude - these somehow.
  • -

    -

-
- - - diff --git a/coregrind/docs/nav.html b/coregrind/docs/nav.html deleted file mode 100644 index ad920ad443..0000000000 --- a/coregrind/docs/nav.html +++ /dev/null @@ -1,72 +0,0 @@ - - - Valgrind - - - - - -
- Contents of this manual
- 1 Introduction
- 1.1 What Valgrind is for
- 1.2 What it does with - your program -

- 2 How to use it, and how to - make sense of the results
- 2.1 Getting started
- 2.2 The commentary
- 2.3 Reporting of errors
- 2.4 Suppressing errors
- 2.5 Command-line flags
- 2.6 Explanation of error messages
- 2.7 Writing suppressions files
- 2.8 The Client Request mechanism
- 2.9 Support for POSIX pthreads
- 2.10 Building and installing
- 2.11 If you have problems -

- 3 Details of the checking machinery
- 3.1 Valid-value (V) bits
- 3.2 Valid-address (A) bits
- 3.3 Putting it all together
- 3.4 Signals
- 3.5 Memory leak detection -

- 4 Limitations
-

- 5 How it works -- a rough overview
- 5.1 Getting started
- 5.2 The translation/instrumentation engine
- 5.3 Tracking the status of memory
- 5.4 System calls
- 5.5 Signals -

- 6 An example
-

- 7 Cache profiling -

- 8 The design and implementation of Valgrind
- - - diff --git a/coregrind/docs/techdocs.html b/coregrind/docs/techdocs.html deleted file mode 100644 index 2e1cc8b7e9..0000000000 --- a/coregrind/docs/techdocs.html +++ /dev/null @@ -1,2524 +0,0 @@ - - - - The design and implementation of Valgrind - - - - -  -

The design and implementation of Valgrind

- -
-Detailed technical notes for hackers, maintainers and the -overly-curious
-These notes pertain to snapshot 20020306
-

-jseward@acm.org
-
http://developer.kde.org/~sewardj
-Copyright © 2000-2002 Julian Seward -

-Valgrind is licensed under the GNU General Public License, -version 2
-An open-source tool for finding memory-management problems in -x86 GNU/Linux executables. -

- -

- - - - -


- -

Introduction

- -This document contains a detailed, highly-technical description of the -internals of Valgrind. This is not the user manual; if you are an -end-user of Valgrind, you do not want to read this. Conversely, if -you really are a hacker-type and want to know how it works, I assume -that you have read the user manual thoroughly. -

-You may need to read this document several times, and carefully. Some -important things, I only say once. - - -

History

- -Valgrind came into public view in late Feb 2002. However, it has been -under contemplation for a very long time, perhaps seriously for about -five years. Somewhat over two years ago, I started working on the x86 -code generator for the Glasgow Haskell Compiler -(http://www.haskell.org/ghc), gaining familiarity with x86 internals -on the way. I then did Cacheprof (http://www.cacheprof.org), gaining -further x86 experience. Some time around Feb 2000 I started -experimenting with a user-space x86 interpreter for x86-Linux. This -worked, but it was clear that a JIT-based scheme would be necessary to -give reasonable performance for Valgrind. Design work for the JITter -started in earnest in Oct 2000, and by early 2001 I had an x86-to-x86 -dynamic translator which could run quite large programs. This -translator was in a sense pointless, since it did not do any -instrumentation or checking. - -

-Most of the rest of 2001 was taken up designing and implementing the -instrumentation scheme. The main difficulty, which consumed a lot -of effort, was to design a scheme which did not generate large numbers -of false uninitialised-value warnings. By late 2001 a satisfactory -scheme had been arrived at, and I started to test it on ever-larger -programs, with an eventual eye to making it work well enough so that -it was helpful to folks debugging the upcoming version 3 of KDE. I've -used KDE since before version 1.0, and wanted to Valgrind to be an -indirect contribution to the KDE 3 development effort. At the start of -Feb 02 the kde-core-devel crew started using it, and gave a huge -amount of helpful feedback and patches in the space of three weeks. -Snapshot 20020306 is the result. - -

-In the best Unix tradition, or perhaps in the spirit of Fred Brooks' -depressing-but-completely-accurate epitaph "build one to throw away; -you will anyway", much of Valgrind is a second or third rendition of -the initial idea. The instrumentation machinery -(vg_translate.c, vg_memory.c) and core CPU -simulation (vg_to_ucode.c, vg_from_ucode.c) -have had three redesigns and rewrites; the register allocator, -low-level memory manager (vg_malloc2.c) and symbol table -reader (vg_symtab2.c) are on the second rewrite. In a -sense, this document serves to record some of the knowledge gained as -a result. - - -

Design overview

- -Valgrind is compiled into a Linux shared object, -valgrind.so, and also a dummy one, -valgrinq.so, of which more later. The -valgrind shell script adds valgrind.so to -the LD_PRELOAD list of extra libraries to be -loaded with any dynamically linked library. This is a standard trick, -one which I assume the LD_PRELOAD mechanism was developed -to support. - -

-valgrind.so -is linked with the -z initfirst flag, which requests that -its initialisation code is run before that of any other object in the -executable image. When this happens, valgrind gains control. The -real CPU becomes "trapped" in valgrind.so and the -translations it generates. The synthetic CPU provided by Valgrind -does, however, return from this initialisation function. So the -normal startup actions, orchestrated by the dynamic linker -ld.so, continue as usual, except on the synthetic CPU, -not the real one. Eventually main is run and returns, -and then the finalisation code of the shared objects is run, -presumably in inverse order to which they were initialised. Remember, -this is still all happening on the simulated CPU. Eventually -valgrind.so's own finalisation code is called. It spots -this event, shuts down the simulated CPU, prints any error summaries -and/or does leak detection, and returns from the initialisation code -on the real CPU. At this point, in effect the real and synthetic CPUs -have merged back into one, Valgrind has lost control of the program, -and the program finally exit()s back to the kernel in the -usual way. - -

-The normal course of activity, one Valgrind has started up, is as -follows. Valgrind never runs any part of your program (usually -referred to as the "client"), not a single byte of it, directly. -Instead it uses function VG_(translate) to translate -basic blocks (BBs, straight-line sequences of code) into instrumented -translations, and those are run instead. The translations are stored -in the translation cache (TC), vg_tc, with the -translation table (TT), vg_tt supplying the -original-to-translation code address mapping. Auxiliary array -VG_(tt_fast) is used as a direct-map cache for fast -lookups in TT; it usually achieves a hit rate of around 98% and -facilitates an orig-to-trans lookup in 4 x86 insns, which is not bad. - -

-Function VG_(dispatch) in vg_dispatch.S is -the heart of the JIT dispatcher. Once a translated code address has -been found, it is executed simply by an x86 call -to the translation. At the end of the translation, the next -original code addr is loaded into %eax, and the -translation then does a ret, taking it back to the -dispatch loop, with, interestingly, zero branch mispredictions. -The address requested in %eax is looked up first in -VG_(tt_fast), and, if not found, by calling C helper -VG_(search_transtab). If there is still no translation -available, VG_(dispatch) exits back to the top-level -C dispatcher VG_(toploop), which arranges for -VG_(translate) to make a new translation. All fairly -unsurprising, really. There are various complexities described below. - -

-The translator, orchestrated by VG_(translate), is -complicated but entirely self-contained. It is described in great -detail in subsequent sections. Translations are stored in TC, with TT -tracking administrative information. The translations are subject to -an approximate LRU-based management scheme. With the current -settings, the TC can hold at most about 15MB of translations, and LRU -passes prune it to about 13.5MB. Given that the -orig-to-translation expansion ratio is about 13:1 to 14:1, this means -TC holds translations for more or less a megabyte of original code, -which generally comes to about 70000 basic blocks for C++ compiled -with optimisation on. Generating new translations is expensive, so it -is worth having a large TC to minimise the (capacity) miss rate. - -

-The dispatcher, VG_(dispatch), receives hints from -the translations which allow it to cheaply spot all control -transfers corresponding to x86 call and ret -instructions. It has to do this in order to spot some special events: -

    -
  • Calls to VG_(shutdown). This is Valgrind's cue to - exit. NOTE: actually this is done a different way; it should be - cleaned up. -

    -

  • Returns of system call handlers, to the return address - VG_(signalreturn_bogusRA). The signal simulator - needs to know when a signal handler is returning, so we spot - jumps (returns) to this address. -

    -

  • Calls to vg_trap_here. All malloc, - free, etc calls that the client program makes are - eventually routed to a call to vg_trap_here, - and Valgrind does its own special thing with these calls. - In effect this provides a trapdoor, by which Valgrind can - intercept certain calls on the simulated CPU, run the call as it - sees fit itself (on the real CPU), and return the result to - the simulated CPU, quite transparently to the client program. -
-Valgrind intercepts the client's malloc, -free, etc, -calls, so that it can store additional information. Each block -malloc'd by the client gives rise to a shadow block -in which Valgrind stores the call stack at the time of the -malloc -call. When the client calls free, Valgrind tries to -find the shadow block corresponding to the address passed to -free, and emits an error message if none can be found. -If it is found, the block is placed on the freed blocks queue -vg_freed_list, it is marked as inaccessible, and -its shadow block now records the call stack at the time of the -free call. Keeping free'd blocks in -this queue allows Valgrind to spot all (presumably invalid) accesses -to them. However, once the volume of blocks in the free queue -exceeds VG_(clo_freelist_vol), blocks are finally -removed from the queue. - -

-Keeping track of A and V bits (note: if you don't know what these are, -you haven't read the user guide carefully enough) for memory is done -in vg_memory.c. This implements a sparse array structure -which covers the entire 4G address space in a way which is reasonably -fast and reasonably space efficient. The 4G address space is divided -up into 64K sections, each covering 64Kb of address space. Given a -32-bit address, the top 16 bits are used to select one of the 65536 -entries in VG_(primary_map). The resulting "secondary" -(SecMap) holds A and V bits for the 64k of address space -chunk corresponding to the lower 16 bits of the address. - - -

Design decisions

- -Some design decisions were motivated by the need to make Valgrind -debuggable. Imagine you are writing a CPU simulator. It works fairly -well. However, you run some large program, like Netscape, and after -tens of millions of instructions, it crashes. How can you figure out -where in your simulator the bug is? - -

-Valgrind's answer is: cheat. Valgrind is designed so that it is -possible to switch back to running the client program on the real -CPU at any point. Using the --stop-after= flag, you can -ask Valgrind to run just some number of basic blocks, and then -run the rest of the way on the real CPU. If you are searching for -a bug in the simulated CPU, you can use this to do a binary search, -which quickly leads you to the specific basic block which is -causing the problem. - -

-This is all very handy. It does constrain the design in certain -unimportant ways. Firstly, the layout of memory, when viewed from the -client's point of view, must be identical regardless of whether it is -running on the real or simulated CPU. This means that Valgrind can't -do pointer swizzling -- well, no great loss -- and it can't run on -the same stack as the client -- again, no great loss. -Valgrind operates on its own stack, VG_(stack), which -it switches to at startup, temporarily switching back to the client's -stack when doing system calls for the client. - -

-Valgrind also receives signals on its own stack, -VG_(sigstack), but for different gruesome reasons -discussed below. - -

-This nice clean switch-back-to-the-real-CPU-whenever-you-like story -is muddied by signals. Problem is that signals arrive at arbitrary -times and tend to slightly perturb the basic block count, with the -result that you can get close to the basic block causing a problem but -can't home in on it exactly. My kludgey hack is to define -SIGNAL_SIMULATION to 1 towards the bottom of -vg_syscall_mem.c, so that signal handlers are run on the -real CPU and don't change the BB counts. - -

-A second hole in the switch-back-to-real-CPU story is that Valgrind's -way of delivering signals to the client is different from that of the -kernel. Specifically, the layout of the signal delivery frame, and -the mechanism used to detect a sighandler returning, are different. -So you can't expect to make the transition inside a sighandler and -still have things working, but in practice that's not much of a -restriction. - -

-Valgrind's implementation of malloc, free, -etc, (in vg_clientmalloc.c, not the low-level stuff in -vg_malloc2.c) is somewhat complicated by the need to -handle switching back at arbitrary points. It does work tho. - - - -

Correctness

- -There's only one of me, and I have a Real Life (tm) as well as hacking -Valgrind [allegedly :-]. That means I don't have time to waste -chasing endless bugs in Valgrind. My emphasis is therefore on doing -everything as simply as possible, with correctness, stability and -robustness being the number one priority, more important than -performance or functionality. As a result: -
    -
  • The code is absolutely loaded with assertions, and these are - permanently enabled. I have no plan to remove or disable - them later. Over the past couple of months, as valgrind has - become more widely used, they have shown their worth, pulling - up various bugs which would otherwise have appeared as - hard-to-find segmentation faults. -

    - I am of the view that it's acceptable to spend 5% of the total - running time of your valgrindified program doing assertion checks - and other internal sanity checks. -

    -

  • Aside from the assertions, valgrind contains various sets of - internal sanity checks, which get run at varying frequencies - during normal operation. VG_(do_sanity_checks) - runs every 1000 basic blocks, which means 500 to 2000 times/second - for typical machines at present. It checks that Valgrind hasn't - overrun its private stack, and does some simple checks on the - memory permissions maps. Once every 25 calls it does some more - extensive checks on those maps. Etc, etc. -

    - The following components also have sanity check code, which can - be enabled to aid debugging: -

      -
    • The low-level memory-manager - (VG_(mallocSanityCheckArena)). This does a - complete check of all blocks and chains in an arena, which - is very slow. Is not engaged by default. -

      -

    • The symbol table reader(s): various checks to ensure - uniqueness of mappings; see VG_(read_symbols) - for a start. Is permanently engaged. -

      -

    • The A and V bit tracking stuff in vg_memory.c. - This can be compiled with cpp symbol - VG_DEBUG_MEMORY defined, which removes all the - fast, optimised cases, and uses simple-but-slow fallbacks - instead. Not engaged by default. -

      -

    • Ditto VG_DEBUG_LEAKCHECK. -

      -

    • The JITter parses x86 basic blocks into sequences of - UCode instructions. It then sanity checks each one with - VG_(saneUInstr) and sanity checks the sequence - as a whole with VG_(saneUCodeBlock). This stuff - is engaged by default, and has caught some way-obscure bugs - in the simulated CPU machinery in its time. -

      -

    • The system call wrapper does - VG_(first_and_last_secondaries_look_plausible) after - every syscall; this is known to pick up bugs in the syscall - wrappers. Engaged by default. -

      -

    • The main dispatch loop, in VG_(dispatch), checks - that translations do not set %ebp to any value - different from VG_EBP_DISPATCH_CHECKED or - & VG_(baseBlock). In effect this test is free, - and is permanently engaged. -

      -

    • There are a couple of ifdefed-out consistency checks I - inserted whilst debugging the new register allocater, - vg_do_register_allocation. -
    -

    -

  • I try to avoid techniques, algorithms, mechanisms, etc, for which - I can supply neither a convincing argument that they are correct, - nor sanity-check code which might pick up bugs in my - implementation. I don't always succeed in this, but I try. - Basically the idea is: avoid techniques which are, in practice, - unverifiable, in some sense. When doing anything, always have in - mind: "how can I verify that this is correct?" -
- -

-Some more specific things are: - -

    -
  • Valgrind runs in the same namespace as the client, at least from - ld.so's point of view, and it therefore absolutely - had better not export any symbol with a name which could clash - with that of the client or any of its libraries. Therefore, all - globally visible symbols exported from valgrind.so - are defined using the VG_ CPP macro. As you'll see - from vg_constants.h, this appends some arbitrary - prefix to the symbol, in order that it be, we hope, globally - unique. Currently the prefix is vgPlain_. For - convenience there are also VGM_, VGP_ - and VGOFF_. All locally defined symbols are declared - static and do not appear in the final shared object. -

    - To check this, I periodically do - nm valgrind.so | grep " T ", - which shows you all the globally exported text symbols. - They should all have an approved prefix, except for those like - malloc, free, etc, which we deliberately - want to shadow and take precedence over the same names exported - from glibc.so, so that valgrind can intercept those - calls easily. Similarly, nm valgrind.so | grep " D " - allows you to find any rogue data-segment symbol names. -

    -

  • Valgrind tries, and almost succeeds, in being completely - independent of all other shared objects, in particular of - glibc.so. For example, we have our own low-level - memory manager in vg_malloc2.c, which is a fairly - standard malloc/free scheme augmented with arenas, and - vg_mylibc.c exports reimplementations of various bits - and pieces you'd normally get from the C library. -

    - Why all the hassle? Because imagine the potential chaos of both - the simulated and real CPUs executing in glibc.so. - It just seems simpler and cleaner to be completely self-contained, - so that only the simulated CPU visits glibc.so. In - practice it's not much hassle anyway. Also, valgrind starts up - before glibc has a chance to initialise itself, and who knows what - difficulties that could lead to. Finally, glibc has definitions - for some types, specifically sigset_t, which conflict - (are different from) the Linux kernel's idea of same. When - Valgrind wants to fiddle around with signal stuff, it wants to - use the kernel's definitions, not glibc's definitions. So it's - simplest just to keep glibc out of the picture entirely. -

    - To find out which glibc symbols are used by Valgrind, reinstate - the link flags -nostdlib -Wl,-no-undefined. This - causes linking to fail, but will tell you what you depend on. - I have mostly, but not entirely, got rid of the glibc - dependencies; what remains is, IMO, fairly harmless. AFAIK the - current dependencies are: memset, - memcmp, stat, system, - sbrk, setjmp and longjmp. - -

    -

  • Similarly, valgrind should not really import any headers other - than the Linux kernel headers, since it knows of no API other than - the kernel interface to talk to. At the moment this is really not - in a good state, and vg_syscall_mem imports, via - vg_unsafe.h, a significant number of C-library - headers so as to know the sizes of various structs passed across - the kernel boundary. This is of course completely bogus, since - there is no guarantee that the C library's definitions of these - structs matches those of the kernel. I have started to sort this - out using vg_kerneliface.h, into which I had intended - to copy all kernel definitions which valgrind could need, but this - has not gotten very far. At the moment it mostly contains - definitions for sigset_t and struct - sigaction, since the kernel's definition for these really - does clash with glibc's. I plan to use a vki_ prefix - on all these types and constants, to denote the fact that they - pertain to Valgrind's Kernel Interface. -

    - Another advantage of having a vg_kerneliface.h file - is that it makes it simpler to interface to a different kernel. - Once can, for example, easily imagine writing a new - vg_kerneliface.h for FreeBSD, or x86 NetBSD. - -

- -

Current limitations

- -No threads. I think fixing this is close to a research-grade problem. -

-No MMX. Fixing this should be relatively easy, using the same giant -trick used for x86 FPU instructions. See below. -

-Support for weird (non-POSIX) signal stuff is patchy. Does anybody -care? -

- - - - -


- -

The instrumenting JITter

- -This really is the heart of the matter. We begin with various side -issues. - -

Run-time storage, and the use of host registers

- -Valgrind translates client (original) basic blocks into instrumented -basic blocks, which live in the translation cache TC, until either the -client finishes or the translations are ejected from TC to make room -for newer ones. -

-Since it generates x86 code in memory, Valgrind has complete control -of the use of registers in the translations. Now pay attention. I -shall say this only once, and it is important you understand this. In -what follows I will refer to registers in the host (real) cpu using -their standard names, %eax, %edi, etc. I -refer to registers in the simulated CPU by capitalising them: -%EAX, %EDI, etc. These two sets of -registers usually bear no direct relationship to each other; there is -no fixed mapping between them. This naming scheme is used fairly -consistently in the comments in the sources. -

-Host registers, once things are up and running, are used as follows: -

    -
  • %esp, the real stack pointer, points - somewhere in Valgrind's private stack area, - VG_(stack) or, transiently, into its signal delivery - stack, VG_(sigstack). -

    -

  • %edi is used as a temporary in code generation; it - is almost always dead, except when used for the Left - value-tag operations. -

    -

  • %eax, %ebx, %ecx, - %edx and %esi are available to - Valgrind's register allocator. They are dead (carry unimportant - values) in between translations, and are live only in - translations. The one exception to this is %eax, - which, as mentioned far above, has a special significance to the - dispatch loop VG_(dispatch): when a translation - returns to the dispatch loop, %eax is expected to - contain the original-code-address of the next translation to run. - The register allocator is so good at minimising spill code that - using five regs and not having to save/restore %edi - actually gives better code than allocating to %edi - as well, but then having to push/pop it around special uses. -

    -

  • %ebp points permanently at - VG_(baseBlock). Valgrind's translations are - position-independent, partly because this is convenient, but also - because translations get moved around in TC as part of the LRUing - activity. All static entities which need to be referred to - from generated code, whether data or helper functions, are stored - starting at VG_(baseBlock) and are therefore reached - by indexing from %ebp. There is but one exception, - which is that by placing the value - VG_EBP_DISPATCH_CHECKED - in %ebp just before a return to the dispatcher, - the dispatcher is informed that the next address to run, - in %eax, requires special treatment. -

    -

  • The real machine's FPU state is pretty much unimportant, for - reasons which will become obvious. Ditto its %eflags - register. -
- -

-The state of the simulated CPU is stored in memory, in -VG_(baseBlock), which is a block of 200 words IIRC. -Recall that %ebp points permanently at the start of this -block. Function vg_init_baseBlock decides what the -offsets of various entities in VG_(baseBlock) are to be, -and allocates word offsets for them. The code generator then emits -%ebp relative addresses to get at those things. The -sequence in which entities are allocated has been carefully chosen so -that the 32 most popular entities come first, because this means 8-bit -offsets can be used in the generated code. - -

-If I was clever, I could make %ebp point 32 words along -VG_(baseBlock), so that I'd have another 32 words of -short-form offsets available, but that's just complicated, and it's -not important -- the first 32 words take 99% (or whatever) of the -traffic. - -

-Currently, the sequence of stuff in VG_(baseBlock) is as -follows: -

    -
  • 9 words, holding the simulated integer registers, - %EAX .. %EDI, and the simulated flags, - %EFLAGS. -

    -

  • Another 9 words, holding the V bit "shadows" for the above 9 regs. -

    -

  • The addresses of various helper routines called from - generated code: - VG_(helper_value_check4_fail), - VG_(helper_value_check0_fail), - which register V-check failures, - VG_(helperc_STOREV4), - VG_(helperc_STOREV1), - VG_(helperc_LOADV4), - VG_(helperc_LOADV1), - which do stores and loads of V bits to/from the - sparse array which keeps track of V bits in memory, - and - VGM_(handle_esp_assignment), which messes with - memory addressibility resulting from changes in %ESP. -

    -

  • The simulated %EIP. -

    -

  • 24 spill words, for when the register allocator can't make it work - with 5 measly registers. -

    -

  • Addresses of helpers VG_(helperc_STOREV2), - VG_(helperc_LOADV2). These are here because 2-byte - loads and stores are relatively rare, so are placed above the - magic 32-word offset boundary. -

    -

  • For similar reasons, addresses of helper functions - VGM_(fpu_write_check) and - VGM_(fpu_read_check), which handle the A/V maps - testing and changes required by FPU writes/reads. -

    -

  • Some other boring helper addresses: - VG_(helper_value_check2_fail) and - VG_(helper_value_check1_fail). These are probably - never emitted now, and should be removed. -

    -

  • The entire state of the simulated FPU, which I believe to be - 108 bytes long. -

    -

  • Finally, the addresses of various other helper functions in - vg_helpers.S, which deal with rare situations which - are tedious or difficult to generate code in-line for. -
- -

-As a general rule, the simulated machine's state lives permanently in -memory at VG_(baseBlock). However, the JITter does some -optimisations which allow the simulated integer registers to be -cached in real registers over multiple simulated instructions within -the same basic block. These are always flushed back into memory at -the end of every basic block, so that the in-memory state is -up-to-date between basic blocks. (This flushing is implied by the -statement above that the real machine's allocatable registers are -dead in between simulated blocks). - - -

Startup, shutdown, and system calls

- -Getting into of Valgrind (VG_(startup), called from -valgrind.so's initialisation section), really means -copying the real CPU's state into VG_(baseBlock), and -then installing our own stack pointer, etc, into the real CPU, and -then starting up the JITter. Exiting valgrind involves copying the -simulated state back to the real state. - -

-Unfortunately, there's a complication at startup time. Problem is -that at the point where we need to take a snapshot of the real CPU's -state, the offsets in VG_(baseBlock) are not set up yet, -because to do so would involve disrupting the real machine's state -significantly. The way round this is to dump the real machine's state -into a temporary, static block of memory, -VG_(m_state_static). We can then set up the -VG_(baseBlock) offsets at our leisure, and copy into it -from VG_(m_state_static) at some convenient later time. -This copying is done by -VG_(copy_m_state_static_to_baseBlock). - -

-On exit, the inverse transformation is (rather unnecessarily) used: -stuff in VG_(baseBlock) is copied to -VG_(m_state_static), and the assembly stub then copies -from VG_(m_state_static) into the real machine registers. - -

-Doing system calls on behalf of the client (vg_syscall.S) -is something of a half-way house. We have to make the world look -sufficiently like that which the client would normally have to make -the syscall actually work properly, but we can't afford to lose -control. So the trick is to copy all of the client's state, except -its program counter, into the real CPU, do the system call, and -copy the state back out. Note that the client's state includes its -stack pointer register, so one effect of this partial restoration is -to cause the system call to be run on the client's stack, as it should -be. - -

-As ever there are complications. We have to save some of our own state -somewhere when restoring the client's state into the CPU, so that we -can keep going sensibly afterwards. In fact the only thing which is -important is our own stack pointer, but for paranoia reasons I save -and restore our own FPU state as well, even though that's probably -pointless. - -

-The complication on the above complication is, that for horrible -reasons to do with signals, we may have to handle a second client -system call whilst the client is blocked inside some other system -call (unbelievable!). That means there's two sets of places to -dump Valgrind's stack pointer and FPU state across the syscall, -and we decide which to use by consulting -VG_(syscall_depth), which is in turn maintained by -VG_(wrap_syscall). - - - -

Introduction to UCode

- -UCode lies at the heart of the x86-to-x86 JITter. The basic premise -is that dealing the the x86 instruction set head-on is just too darn -complicated, so we do the traditional compiler-writer's trick and -translate it into a simpler, easier-to-deal-with form. - -

-In normal operation, translation proceeds through six stages, -coordinated by VG_(translate): -

    -
  1. Parsing of an x86 basic block into a sequence of UCode - instructions (VG_(disBB)). -

    -

  2. UCode optimisation (vg_improve), with the aim of - caching simulated registers in real registers over multiple - simulated instructions, and removing redundant simulated - %EFLAGS saving/restoring. -

    -

  3. UCode instrumentation (vg_instrument), which adds - value and address checking code. -

    -

  4. Post-instrumentation cleanup (vg_cleanup), removing - redundant value-check computations. -

    -

  5. Register allocation (vg_do_register_allocation), - which, note, is done on UCode. -

    -

  6. Emission of final instrumented x86 code - (VG_(emit_code)). -
- -

-Notice how steps 2, 3, 4 and 5 are simple UCode-to-UCode -transformation passes, all on straight-line blocks of UCode (type -UCodeBlock). Steps 2 and 4 are optimisation passes and -can be disabled for debugging purposes, with ---optimise=no and --cleanup=no respectively. - -

-Valgrind can also run in a no-instrumentation mode, given ---instrument=no. This is useful for debugging the JITter -quickly without having to deal with the complexity of the -instrumentation mechanism too. In this mode, steps 3 and 4 are -omitted. - -

-These flags combine, so that --instrument=no together with ---optimise=no means only steps 1, 5 and 6 are used. ---single-step=yes causes each x86 instruction to be -treated as a single basic block. The translations are terrible but -this is sometimes instructive. - -

-The --stop-after=N flag switches back to the real CPU -after N basic blocks. It also re-JITs the final basic -block executed and prints the debugging info resulting, so this -gives you a way to get a quick snapshot of how a basic block looks as -it passes through the six stages mentioned above. If you want to -see full information for every block translated (probably not, but -still ...) find, in VG_(translate), the lines -
dis = True; -
dis = debugging_translation; -
-and comment out the second line. This will spew out debugging -junk faster than you can possibly imagine. - - - -

UCode operand tags: type Tag

- -UCode is, more or less, a simple two-address RISC-like code. In -keeping with the x86 AT&T assembly syntax, generally speaking the -first operand is the source operand, and the second is the destination -operand, which is modified when the uinstr is notionally executed. - -

-UCode instructions have up to three operand fields, each of which has -a corresponding Tag describing it. Possible values for -the tag are: - -

    -
  • NoValue: indicates that the field is not in use. -

    -

  • Lit16: the field contains a 16-bit literal. -

    -

  • Literal: the field denotes a 32-bit literal, whose - value is stored in the lit32 field of the uinstr - itself. Since there is only one lit32 for the whole - uinstr, only one operand field may contain this tag. -

    -

  • SpillNo: the field contains a spill slot number, in - the range 0 to 23 inclusive, denoting one of the spill slots - contained inside VG_(baseBlock). Such tags only - exist after register allocation. -

    -

  • RealReg: the field contains a number in the range 0 - to 7 denoting an integer x86 ("real") register on the host. The - number is the Intel encoding for integer registers. Such tags - only exist after register allocation. -

    -

  • ArchReg: the field contains a number in the range 0 - to 7 denoting an integer x86 register on the simulated CPU. In - reality this means a reference to one of the first 8 words of - VG_(baseBlock). Such tags can exist at any point in - the translation process. -

    -

  • Last, but not least, TempReg. The field contains the - number of one of an infinite set of virtual (integer) - registers. TempRegs are used everywhere throughout - the translation process; you can have as many as you want. The - register allocator maps as many as it can into - RealRegs and turns the rest into - SpillNos, so TempRegs should not exist - after the register allocation phase. -

    - TempRegs are always 32 bits long, even if the data - they hold is logically shorter. In that case the upper unused - bits are required, and, I think, generally assumed, to be zero. - TempRegs holding V bits for quantities shorter than - 32 bits are expected to have ones in the unused places, since a - one denotes "undefined". -

- - -

UCode instructions: type UInstr

- -

-UCode was carefully designed to make it possible to do register -allocation on UCode and then translate the result into x86 code -without needing any extra registers ... well, that was the original -plan, anyway. Things have gotten a little more complicated since -then. In what follows, UCode instructions are referred to as uinstrs, -to distinguish them from x86 instructions. Uinstrs of course have -uopcodes which are (naturally) different from x86 opcodes. - -

-A uinstr (type UInstr) contains -various fields, not all of which are used by any one uopcode: -

    -
  • Three 16-bit operand fields, val1, val2 - and val3. -

    -

  • Three tag fields, tag1, tag2 - and tag3. Each of these has a value of type - Tag, - and they describe what the val1, val2 - and val3 fields contain. -

    -

  • A 32-bit literal field. -

    -

  • Two FlagSets, specifying which x86 condition codes are - read and written by the uinstr. -

    -

  • An opcode byte, containing a value of type Opcode. -

    -

  • A size field, indicating the data transfer size (1/2/4/8/10) in - cases where this makes sense, or zero otherwise. -

    -

  • A condition-code field, which, for jumps, holds a - value of type Condcode, indicating the condition - which applies. The encoding is as it is in the x86 insn stream, - except we add a 17th value CondAlways to indicate - an unconditional transfer. -

    -

  • Various 1-bit flags, indicating whether this insn pertains to an - x86 CALL or RET instruction, whether a widening is signed or not, - etc. -
- -

-UOpcodes (type Opcode) are divided into two groups: those -necessary merely to express the functionality of the x86 code, and -extra uopcodes needed to express the instrumentation. The former -group contains: -

    -
  • GET and PUT, which move values from the - simulated CPU's integer registers (ArchRegs) into - TempRegs, and back. GETF and - PUTF do the corresponding thing for the simulated - %EFLAGS. There are no corresponding insns for the - FPU register stack, since we don't explicitly simulate its - registers. -

    -

  • LOAD and STORE, which, in RISC-like - fashion, are the only uinstrs able to interact with memory. -

    -

  • MOV and CMOV allow unconditional and - conditional moves of values between TempRegs. -

    -

  • ALU operations. Again in RISC-like fashion, these only operate on - TempRegs (before reg-alloc) or RealRegs - (after reg-alloc). These are: ADD, ADC, - AND, OR, XOR, - SUB, SBB, SHL, - SHR, SAR, ROL, - ROR, RCL, RCR, - NOT, NEG, INC, - DEC, BSWAP, CC2VAL and - WIDEN. WIDEN does signed or unsigned - value widening. CC2VAL is used to convert condition - codes into a value, zero or one. The rest are obvious. -

    - To allow for more efficient code generation, we bend slightly the - restriction at the start of the previous para: for - ADD, ADC, XOR, - SUB and SBB, we allow the first (source) - operand to also be an ArchReg, that is, one of the - simulated machine's registers. Also, many of these ALU ops allow - the source operand to be a literal. See - VG_(saneUInstr) for the final word on the allowable - forms of uinstrs. -

    -

  • LEA1 and LEA2 are not strictly - necessary, but allow faciliate better translations. They - record the fancy x86 addressing modes in a direct way, which - allows those amodes to be emitted back into the final - instruction stream more or less verbatim. -

    -

  • CALLM calls a machine-code helper, one of the methods - whose address is stored at some VG_(baseBlock) - offset. PUSH and POP move values - to/from TempReg to the real (Valgrind's) stack, and - CLEAR removes values from the stack. - CALLM_S and CALLM_E delimit the - boundaries of call setups and clearings, for the benefit of the - instrumentation passes. Getting this right is critical, and so - VG_(saneUCodeBlock) makes various checks on the use - of these uopcodes. -

    - It is important to understand that these uopcodes have nothing to - do with the x86 call, return, - push or pop instructions, and are not - used to implement them. Those guys turn into combinations of - GET, PUT, LOAD, - STORE, ADD, SUB, and - JMP. What these uopcodes support is calling of - helper functions such as VG_(helper_imul_32_64), - which do stuff which is too difficult or tedious to emit inline. -

    -

  • FPU, FPU_R and FPU_W. - Valgrind doesn't attempt to simulate the internal state of the - FPU at all. Consequently it only needs to be able to distinguish - FPU ops which read and write memory from those that don't, and - for those which do, it needs to know the effective address and - data transfer size. This is made easier because the x86 FP - instruction encoding is very regular, basically consisting of - 16 bits for a non-memory FPU insn and 11 (IIRC) bits + an address mode - for a memory FPU insn. So our FPU uinstr carries - the 16 bits in its val1 field. And - FPU_R and FPU_W carry 11 bits in that - field, together with the identity of a TempReg or - (later) RealReg which contains the address. -

    -

  • JIFZ is unique, in that it allows a control-flow - transfer which is not deemed to end a basic block. It causes a - jump to a literal (original) address if the specified argument - is zero. -

    -

  • Finally, INCEIP advances the simulated - %EIP by the specified literal amount. This supports - lazy %EIP updating, as described below. -
- -

-Stages 1 and 2 of the 6-stage translation process mentioned above -deal purely with these uopcodes, and no others. They are -sufficient to express pretty much all the x86 32-bit protected-mode -instruction set, at -least everything understood by a pre-MMX original Pentium (P54C). - -

-Stages 3, 4, 5 and 6 also deal with the following extra -"instrumentation" uopcodes. They are used to express all the -definedness-tracking and -checking machinery which valgrind does. In -later sections we show how to create checking code for each of the -uopcodes above. Note that these instrumentation uopcodes, although -some appearing complicated, have been carefully chosen so that -efficient x86 code can be generated for them. GNU superopt v2.5 did a -great job helping out here. Anyways, the uopcodes are as follows: - -

    -
  • GETV and PUTV are analogues to - GET and PUT above. They are identical - except that they move the V bits for the specified values back and - forth to TempRegs, rather than moving the values - themselves. -

    -

  • Similarly, LOADV and STOREV read and - write V bits from the synthesised shadow memory that Valgrind - maintains. In fact they do more than that, since they also do - address-validity checks, and emit complaints if the read/written - addresses are unaddressible. -

    -

  • TESTV, whose parameters are a TempReg - and a size, tests the V bits in the TempReg, at the - specified operation size (0/1/2/4 byte) and emits an error if any - of them indicate undefinedness. This is the only uopcode capable - of doing such tests. -

    -

  • SETV, whose parameters are also TempReg - and a size, makes the V bits in the TempReg indicated - definedness, at the specified operation size. This is usually - used to generate the correct V bits for a literal value, which is - of course fully defined. -

    -

  • GETVF and PUTVF are analogues to - GETF and PUTF. They move the single V - bit used to model definedness of %EFLAGS between its - home in VG_(baseBlock) and the specified - TempReg. -

    -

  • TAG1 denotes one of a family of unary operations on - TempRegs containing V bits. Similarly, - TAG2 denotes one in a family of binary operations on - V bits. -
- -

-These 10 uopcodes are sufficient to express Valgrind's entire -definedness-checking semantics. In fact most of the interesting magic -is done by the TAG1 and TAG2 -suboperations. - -

-First, however, I need to explain about V-vector operation sizes. -There are 4 sizes: 1, 2 and 4, which operate on groups of 8, 16 and 32 -V bits at a time, supporting the usual 1, 2 and 4 byte x86 operations. -However there is also the mysterious size 0, which really means a -single V bit. Single V bits are used in various circumstances; in -particular, the definedness of %EFLAGS is modelled with a -single V bit. Now might be a good time to also point out that for -V bits, 1 means "undefined" and 0 means "defined". Similarly, for A -bits, 1 means "invalid address" and 0 means "valid address". This -seems counterintuitive (and so it is), but testing against zero on -x86s saves instructions compared to testing against all 1s, because -many ALU operations set the Z flag for free, so to speak. - -

-With that in mind, the tag ops are: - -

    -
  • (UNARY) Pessimising casts: VgT_PCast40, - VgT_PCast20, VgT_PCast10, - VgT_PCast01, VgT_PCast02 and - VgT_PCast04. A "pessimising cast" takes a V-bit - vector at one size, and creates a new one at another size, - pessimised in the sense that if any of the bits in the source - vector indicate undefinedness, then all the bits in the result - indicate undefinedness. In this case the casts are all to or from - a single V bit, so for example VgT_PCast40 is a - pessimising cast from 32 bits to 1, whereas - VgT_PCast04 simply copies the single source V bit - into all 32 bit positions in the result. Surprisingly, these ops - can all be implemented very efficiently. -

    - There are also the pessimising casts VgT_PCast14, - from 8 bits to 32, VgT_PCast12, from 8 bits to 16, - and VgT_PCast11, from 8 bits to 8. This last one - seems nonsensical, but in fact it isn't a no-op because, as - mentioned above, any undefined (1) bits in the source infect the - entire result. -

    -

  • (UNARY) Propagating undefinedness upwards in a word: - VgT_Left4, VgT_Left2 and - VgT_Left1. These are used to simulate the worst-case - effects of carry propagation in adds and subtracts. They return a - V vector identical to the original, except that if the original - contained any undefined bits, then it and all bits above it are - marked as undefined too. Hence the Left bit in the names. -

    -

  • (UNARY) Signed and unsigned value widening: - VgT_SWiden14, VgT_SWiden24, - VgT_SWiden12, VgT_ZWiden14, - VgT_ZWiden24 and VgT_ZWiden12. These - mimic the definedness effects of standard signed and unsigned - integer widening. Unsigned widening creates zero bits in the new - positions, so VgT_ZWiden* accordingly park mark - those parts of their argument as defined. Signed widening copies - the sign bit into the new positions, so VgT_SWiden* - copies the definedness of the sign bit into the new positions. - Because 1 means undefined and 0 means defined, these operations - can (fascinatingly) be done by the same operations which they - mimic. Go figure. -

    -

  • (BINARY) Undefined-if-either-Undefined, - Defined-if-either-Defined: VgT_UifU4, - VgT_UifU2, VgT_UifU1, - VgT_UifU0, VgT_DifD4, - VgT_DifD2, VgT_DifD1. These do simple - bitwise operations on pairs of V-bit vectors, with - UifU giving undefined if either arg bit is - undefined, and DifD giving defined if either arg bit - is defined. Abstract interpretation junkies, if any make it this - far, may like to think of them as meets and joins (or is it joins - and meets) in the definedness lattices. -

    -

  • (BINARY; one value, one V bits) Generate argument improvement - terms for AND and OR: VgT_ImproveAND4_TQ, - VgT_ImproveAND2_TQ, VgT_ImproveAND1_TQ, - VgT_ImproveOR4_TQ, VgT_ImproveOR2_TQ, - VgT_ImproveOR1_TQ. These help out with AND and OR - operations. AND and OR have the inconvenient property that the - definedness of the result depends on the actual values of the - arguments as well as their definedness. At the bit level: -
    1 AND undefined = undefined, but -
    0 AND undefined = 0, and similarly -
    0 OR undefined = undefined, but -
    1 OR undefined = 1. -
    -

    - It turns out that gcc (quite legitimately) generates code which - relies on this fact, so we have to model it properly in order to - avoid flooding users with spurious value errors. The ultimate - definedness result of AND and OR is calculated using - UifU on the definedness of the arguments, but we - also DifD in some "improvement" terms which - take into account the above phenomena. -

    - ImproveAND takes as its first argument the actual - value of an argument to AND (the T) and the definedness of that - argument (the Q), and returns a V-bit vector which is defined (0) - for bits which have value 0 and are defined; this, when - DifD into the final result causes those bits to be - defined even if the corresponding bit in the other argument is undefined. -

    - The ImproveOR ops do the dual thing for OR - arguments. Note that XOR does not have this property that one - argument can make the other irrelevant, so there is no need for - such complexity for XOR. -

- -

-That's all the tag ops. If you stare at this long enough, and then -run Valgrind and stare at the pre- and post-instrumented ucode, it -should be fairly obvious how the instrumentation machinery hangs -together. - -

-One point, if you do this: in order to make it easy to differentiate -TempRegs carrying values from TempRegs -carrying V bit vectors, Valgrind prints the former as (for example) -t28 and the latter as q28; the fact that -they carry the same number serves to indicate their relationship. -This is purely for the convenience of the human reader; the register -allocator and code generator don't regard them as different. - - -

Translation into UCode

- -VG_(disBB) allocates a new UCodeBlock and -then uses disInstr to translate x86 instructions one at a -time into UCode, dumping the result in the UCodeBlock. -This goes on until a control-flow transfer instruction is encountered. - -

-Despite the large size of vg_to_ucode.c, this translation -is really very simple. Each x86 instruction is translated entirely -independently of its neighbours, merrily allocating new -TempRegs as it goes. The idea is to have a simple -translator -- in reality, no more than a macro-expander -- and the -- -resulting bad UCode translation is cleaned up by the UCode -optimisation phase which follows. To give you an idea of some x86 -instructions and their translations (this is a complete basic block, -as Valgrind sees it): -

-        0x40435A50:  incl %edx
-
-           0: GETL      %EDX, t0
-           1: INCL      t0  (-wOSZAP)
-           2: PUTL      t0, %EDX
-
-        0x40435A51:  movsbl (%edx),%eax
-
-           3: GETL      %EDX, t2
-           4: LDB       (t2), t2
-           5: WIDENL_Bs t2
-           6: PUTL      t2, %EAX
-
-        0x40435A54:  testb $0x20, 1(%ecx,%eax,2)
-
-           7: GETL      %EAX, t6
-           8: GETL      %ECX, t8
-           9: LEA2L     1(t8,t6,2), t4
-          10: LDB       (t4), t10
-          11: MOVB      $0x20, t12
-          12: ANDB      t12, t10  (-wOSZACP)
-          13: INCEIPo   $9
-
-        0x40435A59:  jnz-8 0x40435A50
-
-          14: Jnzo      $0x40435A50  (-rOSZACP)
-          15: JMPo      $0x40435A5B
-
- -

-Notice how the block always ends with an unconditional jump to the -next block. This is a bit unnecessary, but makes many things simpler. - -

-Most x86 instructions turn into sequences of GET, -PUT, LEA1, LEA2, -LOAD and STORE. Some complicated ones -however rely on calling helper bits of code in -vg_helpers.S. The ucode instructions PUSH, -POP, CALL, CALLM_S and -CALLM_E support this. The calling convention is somewhat -ad-hoc and is not the C calling convention. The helper routines must -save all integer registers, and the flags, that they use. Args are -passed on the stack underneath the return address, as usual, and if -result(s) are to be returned, it (they) are either placed in dummy arg -slots created by the ucode PUSH sequence, or just -overwrite the incoming args. - -

-In order that the instrumentation mechanism can handle calls to these -helpers, VG_(saneUCodeBlock) enforces the following -restrictions on calls to helpers: - -

    -
  • Each CALL uinstr must be bracketed by a preceding - CALLM_S marker (dummy uinstr) and a trailing - CALLM_E marker. These markers are used by the - instrumentation mechanism later to establish the boundaries of the - PUSH, POP and CLEAR - sequences for the call. -

    -

  • PUSH, POP and CLEAR - may only appear inside sections bracketed by CALLM_S - and CALLM_E, and nowhere else. -

    -

  • In any such bracketed section, no two PUSH insns may - push the same TempReg. Dually, no two two - POPs may pop the same TempReg. -

    -

  • Finally, although this is not checked, args should be removed from - the stack with CLEAR, rather than POPs - into a TempReg which is not subsequently used. This - is because the instrumentation mechanism assumes that all values - POPped from the stack are actually used. -
- -Some of the translations may appear to have redundant -TempReg-to-TempReg moves. This helps the -next phase, UCode optimisation, to generate better code. - - - -

UCode optimisation

- -UCode is then subjected to an improvement pass -(vg_improve()), which blurs the boundaries between the -translations of the original x86 instructions. It's pretty -straightforward. Three transformations are done: - -
    -
  • Redundant GET elimination. Actually, more general - than that -- eliminates redundant fetches of ArchRegs. In our - running example, uinstr 3 GETs %EDX into - t2 despite the fact that, by looking at the previous - uinstr, it is already in t0. The GET is - therefore removed, and t2 renamed to t0. - Assuming t0 is allocated to a host register, it means - the simulated %EDX will exist in a host CPU register - for more than one simulated x86 instruction, which seems to me to - be a highly desirable property. -

    - There is some mucking around to do with subregisters; - %AL vs %AH %AX vs - %EAX etc. I can't remember how it works, but in - general we are very conservative, and these tend to invalidate the - caching. -

    -

  • Redundant PUT elimination. This annuls - PUTs of values back to simulated CPU registers if a - later PUT would overwrite the earlier - PUT value, and there is no intervening reads of the - simulated register (ArchReg). -

    - As before, we are paranoid when faced with subregister references. - Also, PUTs of %ESP are never annulled, - because it is vital the instrumenter always has an up-to-date - %ESP value available, %ESP changes - affect addressibility of the memory around the simulated stack - pointer. -

    - The implication of the above paragraph is that the simulated - machine's registers are only lazily updated once the above two - optimisation phases have run, with the exception of - %ESP. TempRegs go dead at the end of - every basic block, from which is is inferrable that any - TempReg caching a simulated CPU reg is flushed (back - into the relevant VG_(baseBlock) slot) at the end of - every basic block. The further implication is that the simulated - registers are only up-to-date at in between basic blocks, and not - at arbitrary points inside basic blocks. And the consequence of - that is that we can only deliver signals to the client in between - basic blocks. None of this seems any problem in practice. -

    -

  • Finally there is a simple def-use thing for condition codes. If - an earlier uinstr writes the condition codes, and the next uinsn - along which actually cares about the condition codes writes the - same or larger set of them, but does not read any, the earlier - uinsn is marked as not writing any condition codes. This saves - a lot of redundant cond-code saving and restoring. -
- -The effect of these transformations on our short block is rather -unexciting, and shown below. On longer basic blocks they can -dramatically improve code quality. - -
-at 3: delete GET, rename t2 to t0 in (4 .. 6)
-at 7: delete GET, rename t6 to t0 in (8 .. 9)
-at 1: annul flag write OSZAP due to later OSZACP
-
-Improved code:
-           0: GETL      %EDX, t0
-           1: INCL      t0
-           2: PUTL      t0, %EDX
-           4: LDB       (t0), t0
-           5: WIDENL_Bs t0
-           6: PUTL      t0, %EAX
-           8: GETL      %ECX, t8
-           9: LEA2L     1(t8,t0,2), t4
-          10: LDB       (t4), t10
-          11: MOVB      $0x20, t12
-          12: ANDB      t12, t10  (-wOSZACP)
-          13: INCEIPo   $9
-          14: Jnzo      $0x40435A50  (-rOSZACP)
-          15: JMPo      $0x40435A5B
-
- -

UCode instrumentation

- -Once you understand the meaning of the instrumentation uinstrs, -discussed in detail above, the instrumentation scheme is fairly -straighforward. Each uinstr is instrumented in isolation, and the -instrumentation uinstrs are placed before the original uinstr. -Our running example continues below. I have placed a blank line -after every original ucode, to make it easier to see which -instrumentation uinstrs correspond to which originals. - -

-As mentioned somewhere above, TempRegs carrying values -have names like t28, and each one has a shadow carrying -its V bits, with names like q28. This pairing aids in -reading instrumented ucode. - -

-One decision about all this is where to have "observation points", -that is, where to check that V bits are valid. I use a minimalistic -scheme, only checking where a failure of validity could cause the -original program to (seg)fault. So the use of values as memory -addresses causes a check, as do conditional jumps (these cause a check -on the definedness of the condition codes). And arguments -PUSHed for helper calls are checked, hence the wierd -restrictions on help call preambles described above. - -

-Another decision is that once a value is tested, it is thereafter -regarded as defined, so that we do not emit multiple undefined-value -errors for the same undefined value. That means that -TESTV uinstrs are always followed by SETV -on the same (shadow) TempRegs. Most of these -SETVs are redundant and are removed by the -post-instrumentation cleanup phase. - -

-The instrumentation for calling helper functions deserves further -comment. The definedness of results from a helper is modelled using -just one V bit. So, in short, we do pessimising casts of the -definedness of all the args, down to a single bit, and then -UifU these bits together. So this single V bit will say -"undefined" if any part of any arg is undefined. This V bit is then -pessimally cast back up to the result(s) sizes, as needed. If, by -seeing that all the args are got rid of with CLEAR and -none with POP, Valgrind sees that the result of the call -is not actually used, it immediately examines the result V bit with a -TESTV -- SETV pair. If it did not do this, -there would be no observation point to detect that the some of the -args to the helper were undefined. Of course, if the helper's results -are indeed used, we don't do this, since the result usage will -presumably cause the result definedness to be checked at some suitable -future point. - -

-In general Valgrind tries to track definedness on a bit-for-bit basis, -but as the above para shows, for calls to helpers we throw in the -towel and approximate down to a single bit. This is because it's too -complex and difficult to track bit-level definedness through complex -ops such as integer multiply and divide, and in any case there is no -reasonable code fragments which attempt to (eg) multiply two -partially-defined values and end up with something meaningful, so -there seems little point in modelling multiplies, divides, etc, in -that level of detail. - -

-Integer loads and stores are instrumented with firstly a test of the -definedness of the address, followed by a LOADV or -STOREV respectively. These turn into calls to -(for example) VG_(helperc_LOADV4). These helpers do two -things: they perform an address-valid check, and they load or store V -bits from/to the relevant address in the (simulated V-bit) memory. - -

-FPU loads and stores are different. As above the definedness of the -address is first tested. However, the helper routine for FPU loads -(VGM_(fpu_read_check)) emits an error if either the -address is invalid or the referenced area contains undefined values. -It has to do this because we do not simulate the FPU at all, and so -cannot track definedness of values loaded into it from memory, so we -have to check them as soon as they are loaded into the FPU, ie, at -this point. We notionally assume that everything in the FPU is -defined. - -

-It follows therefore that FPU writes first check the definedness of -the address, then the validity of the address, and finally mark the -written bytes as well-defined. - -

-If anyone is inspired to extend Valgrind to MMX/SSE insns, I suggest -you use the same trick. It works provided that the FPU/MMX unit is -not used to merely as a conduit to copy partially undefined data from -one place in memory to another. Unfortunately the integer CPU is used -like that (when copying C structs with holes, for example) and this is -the cause of much of the elaborateness of the instrumentation here -described. - -

-vg_instrument() in vg_translate.c actually -does the instrumentation. There are comments explaining how each -uinstr is handled, so we do not repeat that here. As explained -already, it is bit-accurate, except for calls to helper functions. -Unfortunately the x86 insns bt/bts/btc/btr are done by -helper fns, so bit-level accuracy is lost there. This should be fixed -by doing them inline; it will probably require adding a couple new -uinstrs. Also, left and right rotates through the carry flag (x86 -rcl and rcr) are approximated via a single -V bit; so far this has not caused anyone to complain. The -non-carry rotates, rol and ror, are much -more common and are done exactly. Re-visiting the instrumentation for -AND and OR, they seem rather verbose, and I wonder if it could be done -more concisely now. - -

-The lowercase o on many of the uopcodes in the running -example indicates that the size field is zero, usually meaning a -single-bit operation. - -

-Anyroads, the post-instrumented version of our running example looks -like this: - -

-Instrumented code:
-           0: GETVL     %EDX, q0
-           1: GETL      %EDX, t0
-
-           2: TAG1o     q0 = Left4 ( q0 )
-           3: INCL      t0
-
-           4: PUTVL     q0, %EDX
-           5: PUTL      t0, %EDX
-
-           6: TESTVL    q0
-           7: SETVL     q0
-           8: LOADVB    (t0), q0
-           9: LDB       (t0), t0
-
-          10: TAG1o     q0 = SWiden14 ( q0 )
-          11: WIDENL_Bs t0
-
-          12: PUTVL     q0, %EAX
-          13: PUTL      t0, %EAX
-
-          14: GETVL     %ECX, q8
-          15: GETL      %ECX, t8
-
-          16: MOVL      q0, q4
-          17: SHLL      $0x1, q4
-          18: TAG2o     q4 = UifU4 ( q8, q4 )
-          19: TAG1o     q4 = Left4 ( q4 )
-          20: LEA2L     1(t8,t0,2), t4
-
-          21: TESTVL    q4
-          22: SETVL     q4
-          23: LOADVB    (t4), q10
-          24: LDB       (t4), t10
-
-          25: SETVB     q12
-          26: MOVB      $0x20, t12
-
-          27: MOVL      q10, q14
-          28: TAG2o     q14 = ImproveAND1_TQ ( t10, q14 )
-          29: TAG2o     q10 = UifU1 ( q12, q10 )
-          30: TAG2o     q10 = DifD1 ( q14, q10 )
-          31: MOVL      q12, q14
-          32: TAG2o     q14 = ImproveAND1_TQ ( t12, q14 )
-          33: TAG2o     q10 = DifD1 ( q14, q10 )
-          34: MOVL      q10, q16
-          35: TAG1o     q16 = PCast10 ( q16 )
-          36: PUTVFo    q16
-          37: ANDB      t12, t10  (-wOSZACP)
-
-          38: INCEIPo   $9
-
-          39: GETVFo    q18
-          40: TESTVo    q18
-          41: SETVo     q18
-          42: Jnzo      $0x40435A50  (-rOSZACP)
-
-          43: JMPo      $0x40435A5B
-
- - -

UCode post-instrumentation cleanup

- -

-This pass, coordinated by vg_cleanup(), removes redundant -definedness computation created by the simplistic instrumentation -pass. It consists of two passes, -vg_propagate_definedness() followed by -vg_delete_redundant_SETVs. - -

-vg_propagate_definedness() is a simple -constant-propagation and constant-folding pass. It tries to determine -which TempRegs containing V bits will always indicate -"fully defined", and it propagates this information as far as it can, -and folds out as many operations as possible. For example, the -instrumentation for an ADD of a literal to a variable quantity will be -reduced down so that the definedness of the result is simply the -definedness of the variable quantity, since the literal is by -definition fully defined. - -

-vg_delete_redundant_SETVs removes SETVs on -shadow TempRegs for which the next action is a write. -I don't think there's anything else worth saying about this; it is -simple. Read the sources for details. - -

-So the cleaned-up running example looks like this. As above, I have -inserted line breaks after every original (non-instrumentation) uinstr -to aid readability. As with straightforward ucode optimisation, the -results in this block are undramatic because it is so short; longer -blocks benefit more because they have more redundancy which gets -eliminated. - - -

-at 29: delete UifU1 due to defd arg1
-at 32: change ImproveAND1_TQ to MOV due to defd arg2
-at 41: delete SETV
-at 31: delete MOV
-at 25: delete SETV
-at 22: delete SETV
-at 7: delete SETV
-
-           0: GETVL     %EDX, q0
-           1: GETL      %EDX, t0
-
-           2: TAG1o     q0 = Left4 ( q0 )
-           3: INCL      t0
-
-           4: PUTVL     q0, %EDX
-           5: PUTL      t0, %EDX
-
-           6: TESTVL    q0
-           8: LOADVB    (t0), q0
-           9: LDB       (t0), t0
-
-          10: TAG1o     q0 = SWiden14 ( q0 )
-          11: WIDENL_Bs t0
-
-          12: PUTVL     q0, %EAX
-          13: PUTL      t0, %EAX
-
-          14: GETVL     %ECX, q8
-          15: GETL      %ECX, t8
-
-          16: MOVL      q0, q4
-          17: SHLL      $0x1, q4
-          18: TAG2o     q4 = UifU4 ( q8, q4 )
-          19: TAG1o     q4 = Left4 ( q4 )
-          20: LEA2L     1(t8,t0,2), t4
-
-          21: TESTVL    q4
-          23: LOADVB    (t4), q10
-          24: LDB       (t4), t10
-
-          26: MOVB      $0x20, t12
-
-          27: MOVL      q10, q14
-          28: TAG2o     q14 = ImproveAND1_TQ ( t10, q14 )
-          30: TAG2o     q10 = DifD1 ( q14, q10 )
-          32: MOVL      t12, q14
-          33: TAG2o     q10 = DifD1 ( q14, q10 )
-          34: MOVL      q10, q16
-          35: TAG1o     q16 = PCast10 ( q16 )
-          36: PUTVFo    q16
-          37: ANDB      t12, t10  (-wOSZACP)
-
-          38: INCEIPo   $9
-          39: GETVFo    q18
-          40: TESTVo    q18
-          42: Jnzo      $0x40435A50  (-rOSZACP)
-
-          43: JMPo      $0x40435A5B
-
- - -

Translation from UCode

- -This is all very simple, even though vg_from_ucode.c -is a big file. Position-independent x86 code is generated into -a dynamically allocated array emitted_code; this is -doubled in size when it overflows. Eventually the array is handed -back to the caller of VG_(translate), who must copy -the result into TC and TT, and free the array. - -

-This file is structured into four layers of abstraction, which, -thankfully, are glued back together with extensive -__inline__ directives. From the bottom upwards: - -

    -
  • Address-mode emitters, emit_amode_regmem_reg et al. -

    -

  • Emitters for specific x86 instructions. There are quite a lot of - these, with names such as emit_movv_offregmem_reg. - The v suffix is Intel parlance for a 16/32 bit insn; - there are also b suffixes for 8 bit insns. -

    -

  • The next level up are the synth_* functions, which - synthesise possibly a sequence of raw x86 instructions to do some - simple task. Some of these are quite complex because they have to - work around Intel's silly restrictions on subregister naming. See - synth_nonshiftop_reg_reg for example. -

    -

  • Finally, at the top of the heap, we have - emitUInstr(), - which emits code for a single uinstr. -
- -

-Some comments: -

    -
  • The hack for FPU instructions becomes apparent here. To do a - FPU ucode instruction, we load the simulated FPU's - state into from its VG_(baseBlock) into the real FPU - using an x86 frstor insn, do the ucode - FPU insn on the real CPU, and write the updated FPU - state back into VG_(baseBlock) using an - fnsave instruction. This is pretty brutal, but is - simple and it works, and even seems tolerably efficient. There is - no attempt to cache the simulated FPU state in the real FPU over - multiple back-to-back ucode FPU instructions. -

    - FPU_R and FPU_W are also done this way, - with the minor complication that we need to patch in some - addressing mode bits so the resulting insn knows the effective - address to use. This is easy because of the regularity of the x86 - FPU instruction encodings. -

    -

  • An analogous trick is done with ucode insns which claim, in their - flags_r and flags_w fields, that they - read or write the simulated %EFLAGS. For such cases - we first copy the simulated %EFLAGS into the real - %eflags, then do the insn, then, if the insn says it - writes the flags, copy back to %EFLAGS. This is a - bit expensive, which is why the ucode optimisation pass goes to - some effort to remove redundant flag-update annotations. -
- -

-And so ... that's the end of the documentation for the instrumentating -translator! It's really not that complex, because it's composed as a -sequence of simple(ish) self-contained transformations on -straight-line blocks of code. - - -

Top-level dispatch loop

- -Urk. In VG_(toploop). This is basically boring and -unsurprising, not to mention fiddly and fragile. It needs to be -cleaned up. - -

-The only perhaps surprise is that the whole thing is run -on top of a setjmp-installed exception handler, because, -supposing a translation got a segfault, we have to bail out of the -Valgrind-supplied exception handler VG_(oursignalhandler) -and immediately start running the client's segfault handler, if it has -one. In particular we can't finish the current basic block and then -deliver the signal at some convenient future point, because signals -like SIGILL, SIGSEGV and SIGBUS mean that the faulting insn should not -simply be re-tried. (I'm sure there is a clearer way to explain this). - - -

Exceptions, creating new translations

-

Self-modifying code

- -

Lazy updates of the simulated program counter

- -Simulated %EIP is not updated after every simulated x86 -insn as this was regarded as too expensive. Instead ucode -INCEIP insns move it along as and when necessary. -Currently we don't allow it to fall more than 4 bytes behind reality -(see VG_(disBB) for the way this works). -

-Note that %EIP is always brought up to date by the inner -dispatch loop in VG_(dispatch), so that if the client -takes a fault we know at least which basic block this happened in. - - -

The translation cache and translation table

- -

Signals

- -Horrible, horrible. vg_signals.c. -Basically, since we have to intercept all system -calls anyway, we can see when the client tries to install a signal -handler. If it does so, we make a note of what the client asked to -happen, and ask the kernel to route the signal to our own signal -handler, VG_(oursignalhandler). This simply notes the -delivery of signals, and returns. - -

-Every 1000 basic blocks, we see if more signals have arrived. If so, -VG_(deliver_signals) builds signal delivery frames on the -client's stack, and allows their handlers to be run. Valgrind places -in these signal delivery frames a bogus return address, -VG_(signalreturn_bogusRA), and checks all jumps to see -if any jump to it. If so, this is a sign that a signal handler is -returning, and if so Valgrind removes the relevant signal frame from -the client's stack, restores the from the signal frame the simulated -state before the signal was delivered, and allows the client to run -onwards. We have to do it this way because some signal handlers never -return, they just longjmp(), which nukes the signal -delivery frame. - -

-The Linux kernel has a different but equally horrible hack for -detecting signal handler returns. Discovering it is left as an -exercise for the reader. - - - -

Errors, error contexts, error reporting, suppressions

-

Client malloc/free

-

Low-level memory management

-

A and V bitmaps

-

Symbol table management

-

Dealing with system calls

-

Namespace management

-

GDB attaching

-

Non-dependence on glibc or anything else

-

The leak detector

-

Performance problems

-

Continuous sanity checking

-

Tracing, or not tracing, child processes

-

Assembly glue for syscalls

- - -
- -

Extensions

- -Some comments about Stuff To Do. - -

Bugs

- -Stephan Kulow and Marc Mutz report problems with kmail in KDE 3 CVS -(RC2 ish) when run on Valgrind. Stephan has it deadlocking; Marc has -it looping at startup. I can't repro either behaviour. Needs -repro-ing and fixing. - - -

Threads

- -Doing a good job of thread support strikes me as almost a -research-level problem. The central issues are how to do fast cheap -locking of the VG_(primary_map) structure, whether or not -accesses to the individual secondary maps need locking, what -race-condition issues result, and whether the already-nasty mess that -is the signal simulator needs further hackery. - -

-I realise that threads are the most-frequently-requested feature, and -I am thinking about it all. If you have guru-level understanding of -fast mutual exclusion mechanisms and race conditions, I would be -interested in hearing from you. - - -

Verification suite

- -Directory tests/ contains various ad-hoc tests for -Valgrind. However, there is no systematic verification or regression -suite, that, for example, exercises all the stuff in -vg_memory.c, to ensure that illegal memory accesses and -undefined value uses are detected as they should be. It would be good -to have such a suite. - - -

Porting to other platforms

- -It would be great if Valgrind was ported to FreeBSD and x86 NetBSD, -and to x86 OpenBSD, if it's possible (doesn't OpenBSD use a.out-style -executables, not ELF ?) - -

-The main difficulties, for an x86-ELF platform, seem to be: - -

    -
  • You'd need to rewrite the /proc/self/maps parser - (vg_procselfmaps.c). - Easy. -

    -

  • You'd need to rewrite vg_syscall_mem.c, or, more - specifically, provide one for your OS. This is tedious, but you - can implement syscalls on demand, and the Linux kernel interface - is, for the most part, going to look very similar to the *BSD - interfaces, so it's really a copy-paste-and-modify-on-demand job. - As part of this, you'd need to supply a new - vg_kerneliface.h file. -

    -

  • You'd also need to change the syscall wrappers for Valgrind's - internal use, in vg_mylibc.c. -
- -All in all, I think a port to x86-ELF *BSDs is not really very -difficult, and in some ways I would like to see it happen, because -that would force a more clear factoring of Valgrind into platform -dependent and independent pieces. Not to mention, *BSD folks also -deserve to use Valgrind just as much as the Linux crew do. - - -

-


- -

Easy stuff which ought to be done

- -

MMX instructions

- -MMX insns should be supported, using the same trick as for FPU insns. -If the MMX registers are not used to copy uninitialised junk from one -place to another in memory, this means we don't have to actually -simulate the internal MMX unit state, so the FPU hack applies. This -should be fairly easy. - - - -

Fix stabs-info reader

- -The machinery in vg_symtab2.c which reads "stabs" style -debugging info is pretty weak. It usually correctly translates -simulated program counter values into line numbers and procedure -names, but the file name is often completely wrong. I think the -logic used to parse "stabs" entries is weak. It should be fixed. -The simplest solution, IMO, is to copy either the logic or simply the -code out of GNU binutils which does this; since GDB can clearly get it -right, binutils (or GDB?) must have code to do this somewhere. - - - - - -

BT/BTC/BTS/BTR

- -These are x86 instructions which test, complement, set, or reset, a -single bit in a word. At the moment they are both incorrectly -implemented and incorrectly instrumented. - -

-The incorrect instrumentation is due to use of helper functions. This -means we lose bit-level definedness tracking, which could wind up -giving spurious uninitialised-value use errors. The Right Thing to do -is to invent a couple of new UOpcodes, I think GET_BIT -and SET_BIT, which can be used to implement all 4 x86 -insns, get rid of the helpers, and give bit-accurate instrumentation -rules for the two new UOpcodes. - -

-I realised the other day that they are mis-implemented too. The x86 -insns take a bit-index and a register or memory location to access. -For registers the bit index clearly can only be in the range zero to -register-width minus 1, and I assumed the same applied to memory -locations too. But evidently not; for memory locations the index can -be arbitrary, and the processor will index arbitrarily into memory as -a result. This too should be fixed. Sigh. Presumably indexing -outside the immediate word is not actually used by any programs yet -tested on Valgrind, for otherwise they (presumably) would simply not -work at all. If you plan to hack on this, first check the Intel docs -to make sure my understanding is really correct. - - - -

Using PREFETCH instructions

- -Here's a small but potentially interesting project for performance -junkies. Experiments with valgrind's code generator and optimiser(s) -suggest that reducing the number of instructions executed in the -translations and mem-check helpers gives disappointingly small -performance improvements. Perhaps this is because performance of -Valgrindified code is limited by cache misses. After all, each read -in the original program now gives rise to at least three reads, one -for the VG_(primary_map), one of the resulting -secondary, and the original. Not to mention, the instrumented -translations are 13 to 14 times larger than the originals. All in all -one would expect the memory system to be hammered to hell and then -some. - -

-So here's an idea. An x86 insn involving a read from memory, after -instrumentation, will turn into ucode of the following form: -

-    ... calculate effective addr, into ta and qa ...
-    TESTVL qa             -- is the addr defined?
-    LOADV (ta), qloaded   -- fetch V bits for the addr
-    LOAD  (ta), tloaded   -- do the original load
-
-At the point where the LOADV is done, we know the actual -address (ta) from which the real LOAD will -be done. We also know that the LOADV will take around -20 x86 insns to do. So it seems plausible that doing a prefetch of -ta just before the LOADV might just avoid a -miss at the LOAD point, and that might be a significant -performance win. - -

-Prefetch insns are notoriously tempermental, more often than not -making things worse rather than better, so this would require -considerable fiddling around. It's complicated because Intels and -AMDs have different prefetch insns with different semantics, so that -too needs to be taken into account. As a general rule, even placing -the prefetches before the LOADV insn is too near the -LOAD; the ideal distance is apparently circa 200 CPU -cycles. So it might be worth having another analysis/transformation -pass which pushes prefetches as far back as possible, hopefully -immediately after the effective address becomes available. - -

-Doing too many prefetches is also bad because they soak up bus -bandwidth / cpu resources, so some cleverness in deciding which loads -to prefetch and which to not might be helpful. One can imagine not -prefetching client-stack-relative (%EBP or -%ESP) accesses, since the stack in general tends to show -good locality anyway. - -

-There's quite a lot of experimentation to do here, but I think it -might make an interesting week's work for someone. - -

-As of 15-ish March 2002, I've started to experiment with this, using -the AMD prefetch/prefetchw insns. - - - -

User-defined permission ranges

- -This is quite a large project -- perhaps a month's hacking for a -capable hacker to do a good job -- but it's potentially very -interesting. The outcome would be that Valgrind could detect a -whole class of bugs which it currently cannot. - -

-The presentation falls into two pieces. - -

-Part 1: user-defined address-range permission setting -

- -Valgrind intercepts the client's malloc, -free, etc calls, watches system calls, and watches the -stack pointer move. This is currently the only way it knows about -which addresses are valid and which not. Sometimes the client program -knows extra information about its memory areas. For example, the -client could at some point know that all elements of an array are -out-of-date. We would like to be able to convey to Valgrind this -information that the array is now addressable-but-uninitialised, so -that Valgrind can then warn if elements are used before they get new -values. - -

-What I would like are some macros like this: -

-   VALGRIND_MAKE_NOACCESS(addr, len)
-   VALGRIND_MAKE_WRITABLE(addr, len)
-   VALGRIND_MAKE_READABLE(addr, len)
-
-and also, to check that memory is addressible/initialised, -
-   VALGRIND_CHECK_ADDRESSIBLE(addr, len)
-   VALGRIND_CHECK_INITIALISED(addr, len)
-
- -

-I then include in my sources a header defining these macros, rebuild -my app, run under Valgrind, and get user-defined checks. - -

-Now here's a neat trick. It's a nuisance to have to re-link the app -with some new library which implements the above macros. So the idea -is to define the macros so that the resulting executable is still -completely stand-alone, and can be run without Valgrind, in which case -the macros do nothing, but when run on Valgrind, the Right Thing -happens. How to do this? The idea is for these macros to turn into a -piece of inline assembly code, which (1) has no effect when run on the -real CPU, (2) is easily spotted by Valgrind's JITter, and (3) no sane -person would ever write, which is important for avoiding false matches -in (2). So here's a suggestion: -

-   VALGRIND_MAKE_NOACCESS(addr, len)
-
-becomes (roughly speaking) -
-   movl addr, %eax
-   movl len,  %ebx
-   movl $1,   %ecx   -- 1 describes the action; MAKE_WRITABLE might be
-                     -- 2, etc
-   rorl $13, %ecx
-   rorl $19, %ecx
-   rorl $11, %eax
-   rorl $21, %eax
-
-The rotate sequences have no effect, and it's unlikely they would -appear for any other reason, but they define a unique byte-sequence -which the JITter can easily spot. Using the operand constraints -section at the end of a gcc inline-assembly statement, we can tell gcc -that the assembly fragment kills %eax, %ebx, -%ecx and the condition codes, so this fragment is made -harmless when not running on Valgrind, runs quickly when not on -Valgrind, and does not require any other library support. - - -

-Part 2: using it to detect interference between stack variables -

- -Currently Valgrind cannot detect errors of the following form: -

-void fooble ( void )
-{
-   int a[10];
-   int b[10];
-   a[10] = 99;
-}
-
-Now imagine rewriting this as -
-void fooble ( void )
-{
-   int spacer0;
-   int a[10];
-   int spacer1;
-   int b[10];
-   int spacer2;
-   VALGRIND_MAKE_NOACCESS(&spacer0, sizeof(int));
-   VALGRIND_MAKE_NOACCESS(&spacer1, sizeof(int));
-   VALGRIND_MAKE_NOACCESS(&spacer2, sizeof(int));
-   a[10] = 99;
-}
-
-Now the invalid write is certain to hit spacer0 or -spacer1, so Valgrind will spot the error. - -

-There are two complications. - -

-The first is that we don't want to annotate sources by hand, so the -Right Thing to do is to write a C/C++ parser, annotator, prettyprinter -which does this automatically, and run it on post-CPP'd C/C++ source. -See http://www.cacheprof.org for an example of a system which -transparently inserts another phase into the gcc/g++ compilation -route. The parser/prettyprinter is probably not as hard as it sounds; -I would write it in Haskell, a powerful functional language well -suited to doing symbolic computation, with which I am intimately -familar. There is already a C parser written in Haskell by someone in -the Haskell community, and that would probably be a good starting -point. - -

-The second complication is how to get rid of these -NOACCESS records inside Valgrind when the instrumented -function exits; after all, these refer to stack addresses and will -make no sense whatever when some other function happens to re-use the -same stack address range, probably shortly afterwards. I think I -would be inclined to define a special stack-specific macro -

-   VALGRIND_MAKE_NOACCESS_STACK(addr, len)
-
-which causes Valgrind to record the client's %ESP at the -time it is executed. Valgrind will then watch for changes in -%ESP and discard such records as soon as the protected -area is uncovered by an increase in %ESP. I hesitate -with this scheme only because it is potentially expensive, if there -are hundreds of such records, and considering that changes in -%ESP already require expensive messing with stack access -permissions. - -

-This is probably easier and more robust than for the instrumenter -program to try and spot all exit points for the procedure and place -suitable deallocation annotations there. Plus C++ procedures can -bomb out at any point if they get an exception, so spotting return -points at the source level just won't work at all. - -

-Although some work, it's all eminently doable, and it would make -Valgrind into an even-more-useful tool. - - -

- - -


- -

Cache profiling

-Valgrind is a very nice platform for doing cache profiling and other kinds of -simulation, because it converts horrible x86 instructions into nice clean -RISC-like UCode. For example, for cache profiling we are interested in -instructions that read and write memory; in UCode there are only four -instructions that do this: LOAD, STORE, -FPU_R and FPU_W. By contrast, because of the x86 -addressing modes, almost every instruction can read or write memory.

- -Most of the cache profiling machinery is in the file -vg_cachesim.c.

- -These notes are a somewhat haphazard guide to how Valgrind's cache profiling -works.

- -

Cost centres

-Valgrind gathers cache profiling about every instruction executed, -individually. Each instruction has a cost centre associated with it. -There are two kinds of cost centre: one for instructions that don't reference -memory (iCC), and one for instructions that do -(idCC): - -
-typedef struct _CC {
-   ULong a;
-   ULong m1;
-   ULong m2;
-} CC;
-
-typedef struct _iCC {
-   /* word 1 */
-   UChar tag;
-   UChar instr_size;
-
-   /* words 2+ */
-   Addr instr_addr;
-   CC I;
-} iCC;
-   
-typedef struct _idCC {
-   /* word 1 */
-   UChar tag;
-   UChar instr_size;
-   UChar data_size;
-
-   /* words 2+ */
-   Addr instr_addr;
-   CC I; 
-   CC D; 
-} idCC; 
-
- -Each CC has three fields a, m1, -m2 for recording references, level 1 misses and level 2 misses. -Each of these is a 64-bit ULong -- the numbers can get very large, -ie. greater than 4.2 billion allowed by a 32-bit unsigned int.

- -A iCC has one CC for instruction cache accesses. A -idCC has two, one for instruction cache accesses, and one for data -cache accesses.

- -The iCC and dCC structs also store unchanging -information about the instruction: -

    -
  • An instruction-type identification tag (explained below)
  • -

  • Instruction size
  • -

  • Data reference size (idCC only)
  • -

  • Instruction address
  • -

- -Note that data address is not one of the fields for idCC. This is -because for many memory-referencing instructions the data address can change -each time it's executed (eg. if it uses register-offset addressing). We have -to give this item to the cache simulation in a different way (see -Instrumentation section below). Some memory-referencing instructions do always -reference the same address, but we don't try to treat them specialy in order to -keep things simple.

- -Also note that there is only room for recording info about one data cache -access in an idCC. So what about instructions that do a read then -a write, such as: - -

inc %(esi)
- -In a write-allocate cache, as simulated by Valgrind, the write cannot miss, -since it immediately follows the read which will drag the block into the cache -if it's not already there. So the write access isn't really interesting, and -Valgrind doesn't record it. This means that Valgrind doesn't measure -memory references, but rather memory references that could miss in the cache. -This behaviour is the same as that used by the AMD Athlon hardware counters. -It also has the benefit of simplifying the implementation -- instructions that -read and write memory can be treated like instructions that read memory.

- -

Storing cost-centres

-Cost centres are stored in a way that makes them very cheap to lookup, which is -important since one is looked up for every original x86 instruction -executed.

- -Valgrind does JIT translations at the basic block level, and cost centres are -also setup and stored at the basic block level. By doing things carefully, we -store all the cost centres for a basic block in a contiguous array, and lookup -comes almost for free.

- -Consider this part of a basic block (for exposition purposes, pretend it's an -entire basic block): - -

-movl $0x0,%eax
-movl $0x99, -4(%ebp)
-
- -The translation to UCode looks like this: - -
-MOVL      $0x0, t20
-PUTL      t20, %EAX
-INCEIPo   $5
-
-LEA1L     -4(t4), t14
-MOVL      $0x99, t18
-STL       t18, (t14)
-INCEIPo   $7
-
- -The first step is to allocate the cost centres. This requires a preliminary -pass to count how many x86 instructions were in the basic block, and their -types (and thus sizes). UCode translations for single x86 instructions are -delimited by the INCEIPo instruction, the argument of which gives -the byte size of the instruction (note that lazy INCEIP updating is turned off -to allow this).

- -We can tell if an x86 instruction references memory by looking for -LDL and STL UCode instructions, and thus what kind of -cost centre is required. From this we can determine how many cost centres we -need for the basic block, and their sizes. We can then allocate them in a -single array.

- -Consider the example code above. After the preliminary pass, we know we need -two cost centres, one iCC and one dCC. So we -allocate an array to store these which looks like this: - -

-|(uninit)|      tag         (1 byte)
-|(uninit)|      instr_size  (1 bytes)
-|(uninit)|      (padding)   (2 bytes)
-|(uninit)|      instr_addr  (4 bytes)
-|(uninit)|      I.a         (8 bytes)
-|(uninit)|      I.m1        (8 bytes)
-|(uninit)|      I.m2        (8 bytes)
-
-|(uninit)|      tag         (1 byte)
-|(uninit)|      instr_size  (1 byte)
-|(uninit)|      data_size   (1 byte)
-|(uninit)|      (padding)   (1 byte)
-|(uninit)|      instr_addr  (4 bytes)
-|(uninit)|      I.a         (8 bytes)
-|(uninit)|      I.m1        (8 bytes)
-|(uninit)|      I.m2        (8 bytes)
-|(uninit)|      D.a         (8 bytes)
-|(uninit)|      D.m1        (8 bytes)
-|(uninit)|      D.m2        (8 bytes)
-
- -(We can see now why we need tags to distinguish between the two types of cost -centres.)

- -We also record the size of the array. We look up the debug info of the first -instruction in the basic block, and then stick the array into a table indexed -by filename and function name. This makes it easy to dump the information -quickly to file at the end.

- -

Instrumentation

-The instrumentation pass has two main jobs: - -
    -
  1. Fill in the gaps in the allocated cost centres.
  2. -

  3. Add UCode to call the cache simulator for each instruction.
  4. -

- -The instrumentation pass steps through the UCode and the cost centres in -tandem. As each original x86 instruction's UCode is processed, the appropriate -gaps in the instructions cost centre are filled in, for example: - -
-|INSTR_CC|      tag         (1 byte)
-|5       |      instr_size  (1 bytes)
-|(uninit)|      (padding)   (2 bytes)
-|i_addr1 |      instr_addr  (4 bytes)
-|0       |      I.a         (8 bytes)
-|0       |      I.m1        (8 bytes)
-|0       |      I.m2        (8 bytes)
-
-|WRITE_CC|      tag         (1 byte)
-|7       |      instr_size  (1 byte)
-|4       |      data_size   (1 byte)
-|(uninit)|      (padding)   (1 byte)
-|i_addr2 |      instr_addr  (4 bytes)
-|0       |      I.a         (8 bytes)
-|0       |      I.m1        (8 bytes)
-|0       |      I.m2        (8 bytes)
-|0       |      D.a         (8 bytes)
-|0       |      D.m1        (8 bytes)
-|0       |      D.m2        (8 bytes)
-
- -(Note that this step is not performed if a basic block is re-translated; see -here for more information.)

- -GCC inserts padding before the instr_size field so that it is word -aligned.

- -The instrumentation added to call the cache simulation function looks like this -(instrumentation is indented to distinguish it from the original UCode): - -

-MOVL      $0x0, t20
-PUTL      t20, %EAX
-  PUSHL     %eax
-  PUSHL     %ecx
-  PUSHL     %edx
-  MOVL      $0x4091F8A4, t46  # address of 1st CC
-  PUSHL     t46
-  CALLMo    $0x12             # second cachesim function
-  CLEARo    $0x4
-  POPL      %edx
-  POPL      %ecx
-  POPL      %eax
-INCEIPo   $5
-
-LEA1L     -4(t4), t14
-MOVL      $0x99, t18
-  MOVL      t14, t42
-STL       t18, (t14)
-  PUSHL     %eax
-  PUSHL     %ecx
-  PUSHL     %edx
-  PUSHL     t42
-  MOVL      $0x4091F8C4, t44  # address of 2nd CC
-  PUSHL     t44
-  CALLMo    $0x13             # second cachesim function
-  CLEARo    $0x8
-  POPL      %edx
-  POPL      %ecx
-  POPL      %eax
-INCEIPo   $7
-
- -Consider the first instruction's UCode. Each call is surrounded by three -PUSHL and POPL instructions to save and restore the -caller-save registers. Then the address of the instruction's cost centre is -pushed onto the stack, to be the first argument to the cache simulation -function. The address is known at this point because we are doing a -simultaneous pass through the cost centre array. This means the cost centre -lookup for each instruction is almost free (just the cost of pushing an -argument for a function call). Then the call to the cache simulation function -for non-memory-reference instructions is made (note that the -CALLMo UInstruction takes an offset into a table of predefined -functions; it is not an absolute address), and the single argument is -CLEARed from the stack.

- -The second instruction's UCode is similar. The only difference is that, as -mentioned before, we have to pass the address of the data item referenced to -the cache simulation function too. This explains the MOVL t14, -t42 and PUSHL t42 UInstructions. (Note that the seemingly -redundant MOVing will probably be optimised away during register -allocation.)

- -Note that instead of storing unchanging information about each instruction -(instruction size, data size, etc) in its cost centre, we could have passed in -these arguments to the simulation function. But this would slow the calls down -(two or three extra arguments pushed onto the stack). Also it would bloat the -UCode instrumentation by amounts similar to the space required for them in the -cost centre; bloated UCode would also fill the translation cache more quickly, -requiring more translations for large programs and slowing them down more.

- - -

Handling basic block retranslations

-The above description ignores one complication. Valgrind has a limited size -cache for basic block translations; if it fills up, old translations are -discarded. If a discarded basic block is executed again, it must be -re-translated.

- -However, we can't use this approach for profiling -- we can't throw away cost -centres for instructions in the middle of execution! So when a basic block is -translated, we first look for its cost centre array in the hash table. If -there is no cost centre array, it must be the first translation, so we proceed -as described above. But if there is a cost centre array already, it must be a -retranslation. In this case, we skip the cost centre allocation and -initialisation steps, but still do the UCode instrumentation step.

- -

The cache simulation

-The cache simulation is fairly straightforward. It just tracks which memory -blocks are in the cache at the moment (it doesn't track the contents, since -that is irrelevant).

- -The interface to the simulation is quite clean. The functions called from the -UCode contain calls to the simulation functions in the files -vg_cachesim_{I1,D1,L2}.c; these calls are inlined so that only -one function call is done per simulated x86 instruction. The file -vg_cachesim.c simply #includes the three files -containing the simulation, which makes plugging in new cache simulations is -very easy -- you just replace the three files and recompile.

- -

Output

-Output is fairly straightforward, basically printing the cost centre for every -instruction, grouped by files and functions. Total counts (eg. total cache -accesses, total L1 misses) are calculated when traversing this structure rather -than during execution, to save time; the cache simulation functions are called -so often that even one or two extra adds can make a sizeable difference.

- -Input file has the following format: - -

-file         ::= desc_line* cmd_line events_line data_line+ summary_line
-desc_line    ::= "desc:" ws? non_nl_string
-cmd_line     ::= "cmd:" ws? cmd
-events_line  ::= "events:" ws? (event ws)+
-data_line    ::= file_line | fn_line | count_line
-file_line    ::= ("fl=" | "fi=" | "fe=") filename
-fn_line      ::= "fn=" fn_name
-count_line   ::= line_num ws? (count ws)+
-summary_line ::= "summary:" ws? (count ws)+
-count        ::= num | "."
-
- -Where: - -
    -
  • non_nl_string is any string not containing a newline.
  • -

  • cmd is a command line invocation.
  • -

  • filename and fn_name can be anything.
  • -

  • num and line_num are decimal numbers.
  • -

  • ws is whitespace.
  • -

  • nl is a newline.
  • -

- -The contents of the "desc:" lines is printed out at the top of the summary. -This is a generic way of providing simulation specific information, eg. for -giving the cache configuration for cache simulation.

- -Counts can be "." to represent "N/A", eg. the number of write misses for an -instruction that doesn't write to memory.

- -The number of counts in each line and the -summary_line should not exceed the number of events in the -event_line. If the number in each line is less, -vg_annotate treats those missing as though they were a "." entry.

- -A file_line changes the current file name. A fn_line -changes the current function name. A count_line contains counts -that pertain to the current filename/fn_name. A "fn=" file_line -and a fn_line must appear before any count_lines to -give the context of the first count_lines.

- -Each file_line should be immediately followed by a -fn_line. "fi=" file_lines are used to switch -filenames for inlined functions; "fe=" file_lines are similar, but -are put at the end of a basic block in which the file name hasn't been switched -back to the original file name. (fi and fe lines behave the same, they are -only distinguished to help debugging.)

- - -

Summary of performance features

-Quite a lot of work has gone into making the profiling as fast as possible. -This is a summary of the important features: - -
    -
  • The basic block-level cost centre storage allows almost free cost centre - lookup.
  • - -

  • Only one function call is made per instruction simulated; even this - accounts for a sizeable percentage of execution time, but it seems - unavoidable if we want flexibility in the cache simulator.
  • - -

  • Unchanging information about an instruction is stored in its cost centre, - avoiding unnecessary argument pushing, and minimising UCode - instrumentation bloat.
  • - -

  • Summary counts are calculated at the end, rather than during - execution.
  • - -

  • The cachegrind.out output files can contain huge amounts of - information; file format was carefully chosen to minimise file - sizes.
  • -

- - -

Annotation

-Annotation is done by vg_annotate. It is a fairly straightforward Perl script -that slurps up all the cost centres, and then runs through all the chosen -source files, printing out cost centres with them. It too has been carefully -optimised. - - -

Similar work, extensions

-It would be relatively straightforward to do other simulations and obtain -line-by-line information about interesting events. A good example would be -branch prediction -- all branches could be instrumented to interact with a -branch prediction simulator, using very similar techniques to those described -above.

- -In particular, vg_annotate would not need to change -- the file format is such -that it is not specific to the cache simulation, but could be used for any kind -of line-by-line information. The only part of vg_annotate that is specific to -the cache simulation is the name of the input file -(cachegrind.out), although it would be very simple to add an -option to control this.

- - - diff --git a/coregrind/dosyms b/coregrind/dosyms deleted file mode 100755 index 4a46f01e6b..0000000000 --- a/coregrind/dosyms +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/sh - -# A simple script to help me ensure that my libpthread.so looks -# from the outside, to the linker, identical to the original. - -nm /lib/libpthread.so.0 | grep " T " | cut -c 10- > orig-T -nm /lib/libpthread.so.0 | grep " D " | cut -c 10- > orig-D -nm /lib/libpthread.so.0 | grep " W " | cut -c 10- > orig-W - -nm ./libpthread.so | grep " T " | cut -c 10- > mine-T -nm ./libpthread.so | grep " D " | cut -c 10- > mine-D -nm ./libpthread.so | grep " W " | cut -c 10- > mine-W - -echo ========================== TEXT orig vs mine ========================= -sdiff -w 80 orig-T mine-T -echo - -echo ========================== WEAK orig vs mine ========================= -sdiff -w 80 orig-W mine-W -echo - -echo ========================== DATA orig vs mine ========================= -sdiff -w 80 orig-D mine-D -echo diff --git a/coregrind/valgrind.in b/coregrind/valgrind.in deleted file mode 100755 index 7b99277254..0000000000 --- a/coregrind/valgrind.in +++ /dev/null @@ -1,194 +0,0 @@ -#!/bin/sh - -# Should point to the installation directory -prefix="@prefix@" -exec_prefix="@exec_prefix@" -VALGRIND="@libdir@/valgrind" - - -# Other stuff ... -version="@VERSION@" -emailto="jseward@acm.org" - -# The default name of the suppressions file -vgsupp="--suppressions=$VALGRIND/default.supp" - -# name we were invoked with -vgname=`echo $0 | sed 's,^.*/,,'` - -# Valgrind options -vgopts= - -# Prog and arg to run -argopts= - -# Show usage info? -dousage=0 - -# show version info? -doversion=0 - -# Collect up args for Valgrind -while [ $+ != 0 ] -do - arg=$1 - case "$arg" in -# options for the user - --help) dousage=1; break;; - --version) doversion=1; break;; - --logfile-fd=*) vgopts="$vgopts $arg"; shift;; - -v) vgopts="$vgopts $arg"; shift;; - --verbose) vgopts="$vgopts -v"; shift;; - -q) vgopts="$vgopts $arg"; shift;; - --quiet) vgopts="$vgopts $arg"; shift;; - --error-limit=no) vgopts="$vgopts $arg"; shift;; - --error-limit=yes) vgopts="$vgopts $arg"; shift;; - --check-addrVs=no) vgopts="$vgopts $arg"; shift;; - --check-addrVs=yes) vgopts="$vgopts $arg"; shift;; - --gdb-attach=no) vgopts="$vgopts $arg"; shift;; - --gdb-attach=yes) vgopts="$vgopts $arg"; shift;; - --demangle=no) vgopts="$vgopts $arg"; shift;; - --demangle=yes) vgopts="$vgopts $arg"; shift;; - --num-callers=*) vgopts="$vgopts $arg"; shift;; - --partial-loads-ok=no) vgopts="$vgopts $arg"; shift;; - --partial-loads-ok=yes) vgopts="$vgopts $arg"; shift;; - --leak-check=no) vgopts="$vgopts $arg"; shift;; - --leak-check=yes) vgopts="$vgopts $arg"; shift;; - --show-reachable=no) vgopts="$vgopts $arg"; shift;; - --show-reachable=yes) vgopts="$vgopts $arg"; shift;; - --leak-resolution=low) vgopts="$vgopts $arg"; shift;; - --leak-resolution=med) vgopts="$vgopts $arg"; shift;; - --leak-resolution=high) vgopts="$vgopts $arg"; shift;; - --sloppy-malloc=no) vgopts="$vgopts $arg"; shift;; - --sloppy-malloc=yes) vgopts="$vgopts $arg"; shift;; - --alignment=*) vgopts="$vgopts $arg"; shift;; - --trace-children=no) vgopts="$vgopts $arg"; shift;; - --trace-children=yes) vgopts="$vgopts $arg"; shift;; - --workaround-gcc296-bugs=no) vgopts="$vgopts $arg"; shift;; - --workaround-gcc296-bugs=yes) vgopts="$vgopts $arg"; shift;; - --freelist-vol=*) vgopts="$vgopts $arg"; shift;; - --suppressions=*) vgopts="$vgopts $arg"; shift;; - --cachesim=yes) vgopts="$vgopts $arg"; shift;; - --cachesim=no) vgopts="$vgopts $arg"; shift;; - --I1=*,*,*) vgopts="$vgopts $arg"; shift;; - --D1=*,*,*) vgopts="$vgopts $arg"; shift;; - --L2=*,*,*) vgopts="$vgopts $arg"; shift;; - --weird-hacks=*) vgopts="$vgopts $arg"; shift;; -# options for debugging Valgrind - --sanity-level=*) vgopts="$vgopts $arg"; shift;; - --single-step=yes) vgopts="$vgopts $arg"; shift;; - --single-step=no) vgopts="$vgopts $arg"; shift;; - --optimise=yes) vgopts="$vgopts $arg"; shift;; - --optimise=no) vgopts="$vgopts $arg"; shift;; - --instrument=yes) vgopts="$vgopts $arg"; shift;; - --instrument=no) vgopts="$vgopts $arg"; shift;; - --cleanup=yes) vgopts="$vgopts $arg"; shift;; - --cleanup=no) vgopts="$vgopts $arg"; shift;; - --smc-check=none) vgopts="$vgopts $arg"; shift;; - --smc-check=some) vgopts="$vgopts $arg"; shift;; - --smc-check=all) vgopts="$vgopts $arg"; shift;; - --trace-syscalls=yes) vgopts="$vgopts $arg"; shift;; - --trace-syscalls=no) vgopts="$vgopts $arg"; shift;; - --trace-signals=yes) vgopts="$vgopts $arg"; shift;; - --trace-signals=no) vgopts="$vgopts $arg"; shift;; - --trace-symtab=yes) vgopts="$vgopts $arg"; shift;; - --trace-symtab=no) vgopts="$vgopts $arg"; shift;; - --trace-malloc=yes) vgopts="$vgopts $arg"; shift;; - --trace-malloc=no) vgopts="$vgopts $arg"; shift;; - --trace-sched=yes) vgopts="$vgopts $arg"; shift;; - --trace-sched=no) vgopts="$vgopts $arg"; shift;; - --trace-pthread=none) vgopts="$vgopts $arg"; shift;; - --trace-pthread=some) vgopts="$vgopts $arg"; shift;; - --trace-pthread=all) vgopts="$vgopts $arg"; shift;; - --stop-after=*) vgopts="$vgopts $arg"; shift;; - --dump-error=*) vgopts="$vgopts $arg"; shift;; - -*) dousage=1; break;; - *) break;; - esac -done - -if [ z"$doversion" = z1 ]; then - echo "valgrind-$version" - exit 1 -fi - -if [ $# = 0 ] || [ z"$dousage" = z1 ]; then - echo - echo "usage: $vgname [options] prog-and-args" - echo - echo " options for the user, with defaults in [ ], are:" - echo " --help show this message" - echo " --version show version" - echo " -q --quiet run silently; only print error msgs" - echo " -v --verbose be more verbose, incl counts of errors" - echo " --gdb-attach=no|yes start GDB when errors detected? [no]" - echo " --demangle=no|yes automatically demangle C++ names? [yes]" - echo " --num-callers= show callers in stack traces [4]" - echo " --error-limit=no|yes stop showing new errors if too many? [yes]" - echo " --partial-loads-ok=no|yes too hard to explain here; see manual [yes]" - echo " --leak-check=no|yes search for memory leaks at exit? [no]" - echo " --leak-resolution=low|med|high" - echo " amount of bt merging in leak check [low]" - echo " --show-reachable=no|yes show reachable blocks in leak check? [no]" - echo " --sloppy-malloc=no|yes round malloc sizes to next word? [no]" - echo " --alignment= set minimum alignment of allocations [4]" - echo " --trace-children=no|yes Valgrind-ise child processes? [no]" - echo " --logfile-fd= file descriptor for messages [2=stderr]" - echo " --freelist-vol= volume of freed blocks queue [1000000]" - echo " --workaround-gcc296-bugs=no|yes self explanatory [no]" - echo " --suppressions= suppress errors described in" - echo " suppressions file " - echo " --check-addrVs=no|yes experimental lighterweight checking? [yes]" - echo " yes == Valgrind's original behaviour" - echo " --cachesim=no|yes do cache profiling? [no]" - echo " --I1=,, set I1 cache manually" - echo " --D1=,, set D1 cache manually" - echo " --L2=,, set L2 cache manually" - echo " --weird-hacks=hack1,hack2,... [no hacks selected]" - echo " recognised hacks are: ioctl-VTIME truncate-writes" - echo "" - echo - echo " options for debugging Valgrind itself are:" - echo " --sanity-level= level of sanity checking to do [1]" - echo " --single-step=no|yes translate each instr separately? [no]" - echo " --optimise=no|yes improve intermediate code? [yes]" - echo " --instrument=no|yes actually do memory checks? [yes]" - echo " --cleanup=no|yes improve after instrumentation? [yes]" - echo " --smc-check=none|some|all check writes for s-m-c? [some]" - echo " --trace-syscalls=no|yes show all system calls? [no]" - echo " --trace-signals=no|yes show signal handling details? [no]" - echo " --trace-symtab=no|yes show symbol table details? [no]" - echo " --trace-malloc=no|yes show client malloc details? [no]" - echo " --trace-sched=no|yes show thread scheduler details? [no]" - echo " --trace-pthread=none|some|all show pthread event details? [no]" - echo " --stop-after= switch to real CPU after executing" - echo " basic blocks [infinity]" - echo " --dump-error= show translation for basic block" - echo " associated with 'th" - echo " error context [0=don't show any]" - echo - echo " Extra options are read from env variable \$VALGRIND_OPTS" - echo - echo " Valgrind is Copyright (C) 2000-2002 Julian Seward" - echo " and licensed under the GNU General Public License, version 2." - echo " Bug reports, feedback, admiration, abuse, etc, to: $emailto." - echo - exit 1 -fi - -# A bit subtle. The LD_PRELOAD added entry must be absolute -# and not depend on LD_LIBRARY_PATH. This is so that we can -# mess with LD_LIBRARY_PATH for child processes, which makes -# libpthread.so fall out of visibility, independently of -# whether valgrind.so is visible. - -VG_ARGS="$VALGRIND_OPTS $vgsupp $vgopts" -export VG_ARGS -LD_LIBRARY_PATH=$VALGRIND:$LD_LIBRARY_PATH -export LD_LIBRARY_PATH -LD_PRELOAD=$VALGRIND/valgrind.so:$LD_PRELOAD -export LD_PRELOAD -#LD_DEBUG=files -#LD_DEBUG=symbols -#export LD_DEBUG -exec "$@" diff --git a/coregrind/vg_clientfuncs.c b/coregrind/vg_clientfuncs.c deleted file mode 100644 index 80bdae6714..0000000000 --- a/coregrind/vg_clientfuncs.c +++ /dev/null @@ -1,574 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- Code which runs on the simulated CPU. ---*/ -/*--- vg_clientfuncs.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_include.h" -#include "vg_constants.h" - -#include "valgrind.h" /* for VALGRIND_MAGIC_SEQUENCE */ - - -/* --------------------------------------------------------------------- - All the code in this file runs on the SIMULATED CPU. It is - intended for various reasons as drop-in replacements for libc - functions. These functions have global visibility (obviously) and - have no prototypes in vg_include.h, since they are not intended to - be called from within Valgrind. - ------------------------------------------------------------------ */ - -/* --------------------------------------------------------------------- - Intercepts for the GNU malloc interface. - ------------------------------------------------------------------ */ - -#define SIMPLE_REQUEST1(_qyy_request, _qyy_arg1) \ - ({unsigned int _qyy_res; \ - VALGRIND_MAGIC_SEQUENCE(_qyy_res, 0 /* default return */, \ - _qyy_request, \ - _qyy_arg1, 0, 0, 0); \ - _qyy_res; \ - }) - -#define SIMPLE_REQUEST2(_qyy_request, _qyy_arg1, _qyy_arg2) \ - ({unsigned int _qyy_res; \ - VALGRIND_MAGIC_SEQUENCE(_qyy_res, 0 /* default return */, \ - _qyy_request, \ - _qyy_arg1, _qyy_arg2, 0, 0); \ - _qyy_res; \ - }) - - -/* Below are new versions of malloc, __builtin_new, free, - __builtin_delete, calloc and realloc. - - malloc, __builtin_new, free, __builtin_delete, calloc and realloc - can be entered either on the real CPU or the simulated one. If on - the real one, this is because the dynamic linker is running the - static initialisers for C++, before starting up Valgrind itself. - In this case it is safe to route calls through to - VG_(malloc)/vg_free, since that is self-initialising. - - Once Valgrind is initialised, vg_running_on_simd_CPU becomes True. - The call needs to be transferred from the simulated CPU back to the - real one and routed to the vg_client_* functions. To do that, the - client-request mechanism (in valgrind.h) is used to convey requests - to the scheduler. -*/ - -/* ALL calls to malloc wind up here. */ -void* malloc ( Int n ) -{ - void* v; - - if (VG_(clo_trace_malloc)) - VG_(printf)("malloc[simd=%d](%d)", - (UInt)VG_(running_on_simd_CPU), n ); - if (n < 0) { - v = NULL; - VG_(message)(Vg_UserMsg, - "Warning: silly arg (%d) to malloc()", n ); - } else { - if (VG_(clo_sloppy_malloc)) { while ((n % 4) > 0) n++; } - - if (VG_(running_on_simd_CPU)) { - v = (void*)SIMPLE_REQUEST1(VG_USERREQ__MALLOC, n); - } else { - v = VG_(malloc)(VG_AR_CLIENT, n); - } - } - if (VG_(clo_trace_malloc)) - VG_(printf)(" = %p\n", v ); - return (void*)v; -} - - -void* __builtin_new ( Int n ) -{ - void* v; - - if (VG_(clo_trace_malloc)) - VG_(printf)("__builtin_new[simd=%d](%d)", - (UInt)VG_(running_on_simd_CPU), n ); - if (n < 0) { - v = NULL; - VG_(message)(Vg_UserMsg, - "Warning: silly arg (%d) to __builtin_new()", n ); - } else { - if (VG_(clo_sloppy_malloc)) { while ((n % 4) > 0) n++; } - - if (VG_(running_on_simd_CPU)) { - v = (void*)SIMPLE_REQUEST1(VG_USERREQ__BUILTIN_NEW, n); - } else { - v = VG_(malloc)(VG_AR_CLIENT, n); - } - } - if (VG_(clo_trace_malloc)) - VG_(printf)(" = %p\n", v ); - return v; -} - - -void* __builtin_vec_new ( Int n ) -{ - void* v; - - if (VG_(clo_trace_malloc)) - VG_(printf)("__builtin_vec_new[simd=%d](%d)", - (UInt)VG_(running_on_simd_CPU), n ); - if (n < 0) { - v = NULL; - VG_(message)(Vg_UserMsg, - "Warning: silly arg (%d) to __builtin_vec_new()", n ); - } else { - if (VG_(clo_sloppy_malloc)) { while ((n % 4) > 0) n++; } - - if (VG_(running_on_simd_CPU)) { - v = (void*)SIMPLE_REQUEST1(VG_USERREQ__BUILTIN_VEC_NEW, n); - } else { - v = VG_(malloc)(VG_AR_CLIENT, n); - } - } - if (VG_(clo_trace_malloc)) - VG_(printf)(" = %p\n", v ); - return v; -} - - -void free ( void* p ) -{ - if (VG_(clo_trace_malloc)) - VG_(printf)("free[simd=%d](%p)\n", - (UInt)VG_(running_on_simd_CPU), p ); - if (p == NULL) - return; - if (VG_(running_on_simd_CPU)) { - (void)SIMPLE_REQUEST1(VG_USERREQ__FREE, p); - } else { - VG_(free)(VG_AR_CLIENT, p); - } -} - - -void __builtin_delete ( void* p ) -{ - if (VG_(clo_trace_malloc)) - VG_(printf)("__builtin_delete[simd=%d](%p)\n", - (UInt)VG_(running_on_simd_CPU), p ); - if (p == NULL) - return; - if (VG_(running_on_simd_CPU)) { - (void)SIMPLE_REQUEST1(VG_USERREQ__BUILTIN_DELETE, p); - } else { - VG_(free)(VG_AR_CLIENT, p); - } -} - - -void __builtin_vec_delete ( void* p ) -{ - if (VG_(clo_trace_malloc)) - VG_(printf)("__builtin_vec_delete[simd=%d](%p)\n", - (UInt)VG_(running_on_simd_CPU), p ); - if (p == NULL) - return; - if (VG_(running_on_simd_CPU)) { - (void)SIMPLE_REQUEST1(VG_USERREQ__BUILTIN_VEC_DELETE, p); - } else { - VG_(free)(VG_AR_CLIENT, p); - } -} - - -void* calloc ( Int nmemb, Int size ) -{ - void* v; - - if (VG_(clo_trace_malloc)) - VG_(printf)("calloc[simd=%d](%d,%d)", - (UInt)VG_(running_on_simd_CPU), nmemb, size ); - if (nmemb < 0 || size < 0) { - v = NULL; - VG_(message)(Vg_UserMsg, "Warning: silly args (%d,%d) to calloc()", - nmemb, size ); - } else { - if (VG_(running_on_simd_CPU)) { - v = (void*)SIMPLE_REQUEST2(VG_USERREQ__CALLOC, nmemb, size); - } else { - v = VG_(calloc)(VG_AR_CLIENT, nmemb, size); - } - } - if (VG_(clo_trace_malloc)) - VG_(printf)(" = %p\n", v ); - return v; -} - - -void* realloc ( void* ptrV, Int new_size ) -{ - void* v; - - if (VG_(clo_trace_malloc)) - VG_(printf)("realloc[simd=%d](%p,%d)", - (UInt)VG_(running_on_simd_CPU), ptrV, new_size ); - - if (VG_(clo_sloppy_malloc)) - { while ((new_size % 4) > 0) new_size++; } - - if (ptrV == NULL) - return malloc(new_size); - if (new_size <= 0) { - free(ptrV); - if (VG_(clo_trace_malloc)) - VG_(printf)(" = 0\n" ); - return NULL; - } - if (VG_(running_on_simd_CPU)) { - v = (void*)SIMPLE_REQUEST2(VG_USERREQ__REALLOC, ptrV, new_size); - } else { - v = VG_(realloc)(VG_AR_CLIENT, ptrV, new_size); - } - if (VG_(clo_trace_malloc)) - VG_(printf)(" = %p\n", v ); - return v; -} - - -void* memalign ( Int alignment, Int n ) -{ - void* v; - - if (VG_(clo_trace_malloc)) - VG_(printf)("memalign[simd=%d](al %d, size %d)", - (UInt)VG_(running_on_simd_CPU), alignment, n ); - if (n < 0) { - v = NULL; - } else { - if (VG_(clo_sloppy_malloc)) { while ((n % 4) > 0) n++; } - - if (VG_(running_on_simd_CPU)) { - v = (void*)SIMPLE_REQUEST2(VG_USERREQ__MEMALIGN, alignment, n); - } else { - v = VG_(malloc_aligned)(VG_AR_CLIENT, alignment, n); - } - } - if (VG_(clo_trace_malloc)) - VG_(printf)(" = %p\n", v ); - return (void*)v; -} - - -void* valloc ( Int size ) -{ - return memalign(VKI_BYTES_PER_PAGE, size); -} - - -/* Various compatibility wrapper functions, for glibc and libstdc++. */ -void cfree ( void* p ) -{ - free ( p ); -} - - -int mallopt ( int cmd, int value ) -{ - /* In glibc-2.2.4, 1 denotes a successful return value for mallopt */ - return 1; -} - - -int __posix_memalign ( void **memptr, UInt alignment, UInt size ) -{ - void *mem; - - /* Test whether the SIZE argument is valid. It must be a power of - two multiple of sizeof (void *). */ - if (size % sizeof (void *) != 0 || (size & (size - 1)) != 0) - return VKI_EINVAL /*22*/ /*EINVAL*/; - - mem = memalign (alignment, size); - - if (mem != NULL) { - *memptr = mem; - return 0; - } - - return VKI_ENOMEM /*12*/ /*ENOMEM*/; -} - - -/* Bomb out if we get any of these. */ -/* HACK: We shouldn't call VG_(panic) or VG_(message) on the simulated - CPU. Really we should pass the request in the usual way, and - Valgrind itself can do the panic. Too tedious, however. -*/ -void pvalloc ( void ) -{ VG_(panic)("call to pvalloc\n"); } -void malloc_stats ( void ) -{ VG_(panic)("call to malloc_stats\n"); } -void malloc_usable_size ( void ) -{ VG_(panic)("call to malloc_usable_size\n"); } -void malloc_trim ( void ) -{ VG_(panic)("call to malloc_trim\n"); } -void malloc_get_state ( void ) -{ VG_(panic)("call to malloc_get_state\n"); } -void malloc_set_state ( void ) -{ VG_(panic)("call to malloc_set_state\n"); } - - -/* Yet another ugly hack. Cannot include because we - implement functions implemented there with different signatures. - This struct definition MUST match the system one. */ - -/* SVID2/XPG mallinfo structure */ -struct mallinfo { - int arena; /* total space allocated from system */ - int ordblks; /* number of non-inuse chunks */ - int smblks; /* unused -- always zero */ - int hblks; /* number of mmapped regions */ - int hblkhd; /* total space in mmapped regions */ - int usmblks; /* unused -- always zero */ - int fsmblks; /* unused -- always zero */ - int uordblks; /* total allocated space */ - int fordblks; /* total non-inuse space */ - int keepcost; /* top-most, releasable (via malloc_trim) space */ -}; - -struct mallinfo mallinfo ( void ) -{ - /* Should really try to return something a bit more meaningful */ - Int i; - struct mallinfo mi; - UChar* pmi = (UChar*)(&mi); - for (i = 0; i < sizeof(mi); i++) - pmi[i] = 0; - return mi; -} - - -/* --------------------------------------------------------------------- - Replace some C lib things with equivs which don't get - spurious value warnings. THEY RUN ON SIMD CPU! - ------------------------------------------------------------------ */ - -char* strrchr ( const char* s, int c ) -{ - UChar ch = (UChar)((UInt)c); - UChar* p = (UChar*)s; - UChar* last = NULL; - while (True) { - if (*p == ch) last = p; - if (*p == 0) return last; - p++; - } -} - -char* strchr ( const char* s, int c ) -{ - UChar ch = (UChar)((UInt)c); - UChar* p = (UChar*)s; - while (True) { - if (*p == ch) return p; - if (*p == 0) return NULL; - p++; - } -} - -char* strcat ( char* dest, const char* src ) -{ - Char* dest_orig = dest; - while (*dest) dest++; - while (*src) *dest++ = *src++; - *dest = 0; - return dest_orig; -} - -unsigned int strlen ( const char* str ) -{ - UInt i = 0; - while (str[i] != 0) i++; - return i; -} - -char* strcpy ( char* dest, const char* src ) -{ - Char* dest_orig = dest; - while (*src) *dest++ = *src++; - *dest = 0; - return dest_orig; -} - -int strncmp ( const char* s1, const char* s2, unsigned int nmax ) -{ - unsigned int n = 0; - while (True) { - if (n >= nmax) return 0; - if (*s1 == 0 && *s2 == 0) return 0; - if (*s1 == 0) return -1; - if (*s2 == 0) return 1; - - if (*(UChar*)s1 < *(UChar*)s2) return -1; - if (*(UChar*)s1 > *(UChar*)s2) return 1; - - s1++; s2++; n++; - } -} - -int strcmp ( const char* s1, const char* s2 ) -{ - register char c1, c2; - while (True) { - c1 = *s1; - c2 = *s2; - if (c1 != c2) break; - if (c1 == 0) break; - s1++; s2++; - } - if (c1 < c2) return -1; - if (c1 > c2) return 1; - return 0; -} - -void* memchr(const void *s, int c, unsigned int n) -{ - unsigned int i; - UChar c0 = (UChar)c; - UChar* p = (UChar*)s; - for (i = 0; i < n; i++) - if (p[i] == c0) return (void*)(&p[i]); - return NULL; -} - -void* memcpy( void *dst, const void *src, unsigned int len ) -{ - register char *d; - register char *s; - if ( dst > src ) { - d = (char *)dst + len - 1; - s = (char *)src + len - 1; - while ( len >= 4 ) { - *d-- = *s--; - *d-- = *s--; - *d-- = *s--; - *d-- = *s--; - len -= 4; - } - while ( len-- ) { - *d-- = *s--; - } - } else if ( dst < src ) { - d = (char *)dst; - s = (char *)src; - while ( len >= 4 ) { - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - *d++ = *s++; - len -= 4; - } - while ( len-- ) { - *d++ = *s++; - } - } - return dst; -} - - -/* --------------------------------------------------------------------- - Horrible hack to make sigsuspend() sort-of work OK. Same trick as - for pause() in vg_libpthread.so. - ------------------------------------------------------------------ */ - -/* Horrible because - - -- uses VG_(ksigprocmask), VG_(nanosleep) and vg_assert, which are - valgrind-native (not intended for client use). - - -- This is here so single-threaded progs (not linking libpthread.so) - can see it. But pause() should also be here. ??? -*/ - -/* Either libc supplies this (weak) or our libpthread.so supplies it - (strong) in a threaded setting. -*/ -extern int* __errno_location ( void ); - - -int sigsuspend ( /* const sigset_t * */ void* mask) -{ - unsigned int n_orig, n_now; - struct vki_timespec nanosleep_interval; - - VALGRIND_MAGIC_SEQUENCE(n_orig, 0xFFFFFFFF /* default */, - VG_USERREQ__GET_N_SIGS_RETURNED, - 0, 0, 0, 0); - vg_assert(n_orig != 0xFFFFFFFF); - - VG_(ksigprocmask)(VKI_SIG_SETMASK, mask, NULL); - - while (1) { - VALGRIND_MAGIC_SEQUENCE(n_now, 0xFFFFFFFF /* default */, - VG_USERREQ__GET_N_SIGS_RETURNED, - 0, 0, 0, 0); - vg_assert(n_now != 0xFFFFFFFF); - vg_assert(n_now >= n_orig); - if (n_now != n_orig) break; - - nanosleep_interval.tv_sec = 0; - nanosleep_interval.tv_nsec = 53 * 1000 * 1000; /* 53 milliseconds */ - /* It's critical here that valgrind's nanosleep implementation - is nonblocking. */ - VG_(nanosleep)( &nanosleep_interval, NULL); - } - - /* Maybe this is OK both in single and multithreaded setting. */ - * (__errno_location()) = -VKI_EINTR; /* == EINTR; */ - return -1; -} - - -/* --------------------------------------------------------------------- - Hook for running __libc_freeres once the program exits. - ------------------------------------------------------------------ */ - -void VG_(__libc_freeres_wrapper)( void ) -{ - int res; - extern void __libc_freeres(void); - __libc_freeres(); - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__LIBC_FREERES_DONE, 0, 0, 0, 0); - /*NOTREACHED*/ - vg_assert(12345+54321 == 999999); -} - - -/*--------------------------------------------------------------------*/ -/*--- end vg_clientfuncs.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_clientmalloc.c b/coregrind/vg_clientmalloc.c deleted file mode 100644 index 0292aa404d..0000000000 --- a/coregrind/vg_clientmalloc.c +++ /dev/null @@ -1,579 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- An implementation of malloc/free for the client. ---*/ -/*--- vg_clientmalloc.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_include.h" - - -/*------------------------------------------------------------*/ -/*--- Defns ---*/ -/*------------------------------------------------------------*/ - -/* #define DEBUG_CLIENTMALLOC */ - -/* Holds malloc'd but not freed blocks. */ -#define VG_MALLOCLIST_NO(aa) (((UInt)(aa)) % VG_N_MALLOCLISTS) -static ShadowChunk* vg_malloclist[VG_N_MALLOCLISTS]; -static Bool vg_client_malloc_init_done = False; - -/* Holds blocks after freeing. */ -static ShadowChunk* vg_freed_list_start = NULL; -static ShadowChunk* vg_freed_list_end = NULL; -static Int vg_freed_list_volume = 0; - -/* Stats ... */ -static UInt vg_cmalloc_n_mallocs = 0; -static UInt vg_cmalloc_n_frees = 0; -static UInt vg_cmalloc_bs_mallocd = 0; - -static UInt vg_mlist_frees = 0; -static UInt vg_mlist_tries = 0; - - -/*------------------------------------------------------------*/ -/*--- Fns ---*/ -/*------------------------------------------------------------*/ - -/* Allocate a suitably-sized array, copy all the malloc-d block - shadows into it, and return both the array and the size of it. - This is used by the memory-leak detector. -*/ -ShadowChunk** VG_(get_malloc_shadows) ( /*OUT*/ UInt* n_shadows ) -{ - UInt i, scn; - ShadowChunk** arr; - ShadowChunk* sc; - *n_shadows = 0; - for (scn = 0; scn < VG_N_MALLOCLISTS; scn++) { - for (sc = vg_malloclist[scn]; sc != NULL; sc = sc->next) { - (*n_shadows)++; - } - } - if (*n_shadows == 0) return NULL; - - arr = VG_(malloc)( VG_AR_PRIVATE, - *n_shadows * sizeof(ShadowChunk*) ); - - i = 0; - for (scn = 0; scn < VG_N_MALLOCLISTS; scn++) { - for (sc = vg_malloclist[scn]; sc != NULL; sc = sc->next) { - arr[i++] = sc; - } - } - vg_assert(i == *n_shadows); - return arr; -} - -static void client_malloc_init ( void ) -{ - UInt ml_no; - if (vg_client_malloc_init_done) return; - for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++) - vg_malloclist[ml_no] = NULL; - vg_client_malloc_init_done = True; -} - - -static __attribute__ ((unused)) - Int count_freelist ( void ) -{ - ShadowChunk* sc; - Int n = 0; - for (sc = vg_freed_list_start; sc != NULL; sc = sc->next) - n++; - return n; -} - -static __attribute__ ((unused)) - Int count_malloclists ( void ) -{ - ShadowChunk* sc; - UInt ml_no; - Int n = 0; - for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++) - for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next) - n++; - return n; -} - -static __attribute__ ((unused)) - void freelist_sanity ( void ) -{ - ShadowChunk* sc; - Int n = 0; - /* VG_(printf)("freelist sanity\n"); */ - for (sc = vg_freed_list_start; sc != NULL; sc = sc->next) - n += sc->size; - vg_assert(n == vg_freed_list_volume); -} - -/* Remove sc from malloc list # sc. It is an unchecked error for - sc not to be present in the list. -*/ -static void remove_from_malloclist ( UInt ml_no, ShadowChunk* sc ) -{ - ShadowChunk *sc1, *sc2; - if (sc == vg_malloclist[ml_no]) { - vg_malloclist[ml_no] = vg_malloclist[ml_no]->next; - } else { - sc1 = vg_malloclist[ml_no]; - vg_assert(sc1 != NULL); - sc2 = sc1->next; - while (sc2 != sc) { - vg_assert(sc2 != NULL); - sc1 = sc2; - sc2 = sc2->next; - } - vg_assert(sc1->next == sc); - vg_assert(sc2 == sc); - sc1->next = sc2->next; - } -} - - -/* Put a shadow chunk on the freed blocks queue, possibly freeing up - some of the oldest blocks in the queue at the same time. */ - -static void add_to_freed_queue ( ShadowChunk* sc ) -{ - ShadowChunk* sc1; - - /* Put it at the end of the freed list */ - if (vg_freed_list_end == NULL) { - vg_assert(vg_freed_list_start == NULL); - vg_freed_list_end = vg_freed_list_start = sc; - vg_freed_list_volume = sc->size; - } else { - vg_assert(vg_freed_list_end->next == NULL); - vg_freed_list_end->next = sc; - vg_freed_list_end = sc; - vg_freed_list_volume += sc->size; - } - sc->next = NULL; - - /* Release enough of the oldest blocks to bring the free queue - volume below vg_clo_freelist_vol. */ - - while (vg_freed_list_volume > VG_(clo_freelist_vol)) { - /* freelist_sanity(); */ - vg_assert(vg_freed_list_start != NULL); - vg_assert(vg_freed_list_end != NULL); - - sc1 = vg_freed_list_start; - vg_freed_list_volume -= sc1->size; - /* VG_(printf)("volume now %d\n", vg_freed_list_volume); */ - vg_assert(vg_freed_list_volume >= 0); - - if (vg_freed_list_start == vg_freed_list_end) { - vg_freed_list_start = vg_freed_list_end = NULL; - } else { - vg_freed_list_start = sc1->next; - } - sc1->next = NULL; /* just paranoia */ - VG_(free)(VG_AR_CLIENT, (void*)(sc1->data)); - VG_(free)(VG_AR_PRIVATE, sc1); - } -} - - -/* Allocate a user-chunk of size bytes. Also allocate its shadow - block, make the shadow block point at the user block. Put the - shadow chunk on the appropriate list, and set all memory - protections correctly. */ - -static ShadowChunk* client_malloc_shadow ( ThreadState* tst, - UInt align, UInt size, - VgAllocKind kind ) -{ - ShadowChunk* sc; - Addr p; - UInt ml_no; - -# ifdef DEBUG_CLIENTMALLOC - VG_(printf)("[m %d, f %d (%d)] client_malloc_shadow ( al %d, sz %d )\n", - count_malloclists(), - count_freelist(), vg_freed_list_volume, - align, size ); -# endif - - vg_assert(align >= 4); - if (align == 4) - p = (Addr)VG_(malloc)(VG_AR_CLIENT, size); - else - p = (Addr)VG_(malloc_aligned)(VG_AR_CLIENT, align, size); - - sc = VG_(malloc)(VG_AR_PRIVATE, sizeof(ShadowChunk)); - sc->where = VG_(get_ExeContext)(False, tst->m_eip, tst->m_ebp); - sc->size = size; - sc->allockind = kind; - sc->data = p; - ml_no = VG_MALLOCLIST_NO(p); - sc->next = vg_malloclist[ml_no]; - vg_malloclist[ml_no] = sc; - - VGM_(make_writable)(p, size); - VGM_(make_noaccess)(p + size, - VG_AR_CLIENT_REDZONE_SZB); - VGM_(make_noaccess)(p - VG_AR_CLIENT_REDZONE_SZB, - VG_AR_CLIENT_REDZONE_SZB); - - return sc; -} - - -/* Allocate memory, noticing whether or not we are doing the full - instrumentation thing. */ - -void* VG_(client_malloc) ( ThreadState* tst, UInt size, VgAllocKind kind ) -{ - ShadowChunk* sc; - - VGP_PUSHCC(VgpCliMalloc); - client_malloc_init(); -# ifdef DEBUG_CLIENTMALLOC - VG_(printf)("[m %d, f %d (%d)] client_malloc ( %d, %x )\n", - count_malloclists(), - count_freelist(), vg_freed_list_volume, - size, raw_alloc_kind ); -# endif - - vg_cmalloc_n_mallocs ++; - vg_cmalloc_bs_mallocd += size; - - if (!VG_(clo_instrument)) { - VGP_POPCC; - return VG_(malloc) ( VG_AR_CLIENT, size ); - } - - sc = client_malloc_shadow ( tst, VG_(clo_alignment), size, kind ); - VGP_POPCC; - return (void*)(sc->data); -} - - -void* VG_(client_memalign) ( ThreadState* tst, UInt align, UInt size ) -{ - ShadowChunk* sc; - VGP_PUSHCC(VgpCliMalloc); - client_malloc_init(); -# ifdef DEBUG_CLIENTMALLOC - VG_(printf)("[m %d, f %d (%d)] client_memalign ( al %d, sz %d )\n", - count_malloclists(), - count_freelist(), vg_freed_list_volume, - align, size ); -# endif - - vg_cmalloc_n_mallocs ++; - vg_cmalloc_bs_mallocd += size; - - if (!VG_(clo_instrument)) { - VGP_POPCC; - return VG_(malloc_aligned) ( VG_AR_CLIENT, align, size ); - } - sc = client_malloc_shadow ( tst, align, size, Vg_AllocMalloc ); - VGP_POPCC; - return (void*)(sc->data); -} - - -void VG_(client_free) ( ThreadState* tst, void* ptrV, VgAllocKind kind ) -{ - ShadowChunk* sc; - UInt ml_no; - - VGP_PUSHCC(VgpCliMalloc); - client_malloc_init(); -# ifdef DEBUG_CLIENTMALLOC - VG_(printf)("[m %d, f %d (%d)] client_free ( %p, %x )\n", - count_malloclists(), - count_freelist(), vg_freed_list_volume, - ptrV, raw_alloc_kind ); -# endif - - vg_cmalloc_n_frees ++; - - if (!VG_(clo_instrument)) { - VGP_POPCC; - VG_(free) ( VG_AR_CLIENT, ptrV ); - return; - } - - /* first, see if ptrV is one vg_client_malloc gave out. */ - ml_no = VG_MALLOCLIST_NO(ptrV); - vg_mlist_frees++; - for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next) { - vg_mlist_tries++; - if ((Addr)ptrV == sc->data) - break; - } - - if (sc == NULL) { - VG_(record_free_error) ( tst, (Addr)ptrV ); - VGP_POPCC; - return; - } - - /* check if its a matching free() / delete / delete [] */ - if (kind != sc->allockind) - VG_(record_freemismatch_error) ( tst, (Addr) ptrV ); - - /* Remove the shadow chunk from the mallocd list. */ - remove_from_malloclist ( ml_no, sc ); - - /* Declare it inaccessible. */ - VGM_(make_noaccess) ( sc->data - VG_AR_CLIENT_REDZONE_SZB, - sc->size + 2*VG_AR_CLIENT_REDZONE_SZB ); - VGM_(make_noaccess) ( (Addr)sc, sizeof(ShadowChunk) ); - sc->where = VG_(get_ExeContext)(False, tst->m_eip, tst->m_ebp); - - /* Put it out of harm's way for a while. */ - add_to_freed_queue ( sc ); - VGP_POPCC; -} - - - -void* VG_(client_calloc) ( ThreadState* tst, UInt nmemb, UInt size1 ) -{ - ShadowChunk* sc; - Addr p; - UInt size, i, ml_no; - - VGP_PUSHCC(VgpCliMalloc); - client_malloc_init(); - -# ifdef DEBUG_CLIENTMALLOC - VG_(printf)("[m %d, f %d (%d)] client_calloc ( %d, %d )\n", - count_malloclists(), - count_freelist(), vg_freed_list_volume, - nmemb, size1 ); -# endif - - vg_cmalloc_n_mallocs ++; - vg_cmalloc_bs_mallocd += nmemb * size1; - - if (!VG_(clo_instrument)) { - VGP_POPCC; - return VG_(calloc) ( VG_AR_CLIENT, nmemb, size1 ); - } - - size = nmemb * size1; - p = (Addr)VG_(malloc)(VG_AR_CLIENT, size); - sc = VG_(malloc)(VG_AR_PRIVATE, sizeof(ShadowChunk)); - sc->where = VG_(get_ExeContext)(False, tst->m_eip, tst->m_ebp); - sc->size = size; - sc->allockind = Vg_AllocMalloc; /* its a lie - but true. eat this :) */ - sc->data = p; - ml_no = VG_MALLOCLIST_NO(p); - sc->next = vg_malloclist[ml_no]; - vg_malloclist[ml_no] = sc; - - VGM_(make_readable)(p, size); - VGM_(make_noaccess)(p + size, - VG_AR_CLIENT_REDZONE_SZB); - VGM_(make_noaccess)(p - VG_AR_CLIENT_REDZONE_SZB, - VG_AR_CLIENT_REDZONE_SZB); - - for (i = 0; i < size; i++) ((UChar*)p)[i] = 0; - - VGP_POPCC; - return (void*)p; -} - - -void* VG_(client_realloc) ( ThreadState* tst, void* ptrV, UInt size_new ) -{ - ShadowChunk *sc, *sc_new; - UInt i, ml_no; - - VGP_PUSHCC(VgpCliMalloc); - client_malloc_init(); - -# ifdef DEBUG_CLIENTMALLOC - VG_(printf)("[m %d, f %d (%d)] client_realloc ( %p, %d )\n", - count_malloclists(), - count_freelist(), vg_freed_list_volume, - ptrV, size_new ); -# endif - - vg_cmalloc_n_frees ++; - vg_cmalloc_n_mallocs ++; - vg_cmalloc_bs_mallocd += size_new; - - if (!VG_(clo_instrument)) { - vg_assert(ptrV != NULL && size_new != 0); - VGP_POPCC; - return VG_(realloc) ( VG_AR_CLIENT, ptrV, size_new ); - } - - /* First try and find the block. */ - ml_no = VG_MALLOCLIST_NO(ptrV); - for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next) { - if ((Addr)ptrV == sc->data) - break; - } - - if (sc == NULL) { - VG_(record_free_error) ( tst, (Addr)ptrV ); - /* Perhaps we should keep going regardless. */ - VGP_POPCC; - return NULL; - } - - if (sc->allockind != Vg_AllocMalloc) { - /* can not realloc a range that was allocated with new or new [] */ - VG_(record_freemismatch_error) ( tst, (Addr)ptrV ); - /* but keep going anyway */ - } - - if (sc->size == size_new) { - /* size unchanged */ - VGP_POPCC; - return ptrV; - } - if (sc->size > size_new) { - /* new size is smaller */ - VGM_(make_noaccess)( sc->data + size_new, - sc->size - size_new ); - sc->size = size_new; - VGP_POPCC; - return ptrV; - } else { - /* new size is bigger */ - sc_new = client_malloc_shadow ( tst, VG_(clo_alignment), - size_new, Vg_AllocMalloc ); - for (i = 0; i < sc->size; i++) - ((UChar*)(sc_new->data))[i] = ((UChar*)(sc->data))[i]; - VGM_(copy_address_range_perms) ( - sc->data, sc_new->data, sc->size ); - remove_from_malloclist ( VG_MALLOCLIST_NO(sc->data), sc ); - VGM_(make_noaccess) ( sc->data - VG_AR_CLIENT_REDZONE_SZB, - sc->size + 2*VG_AR_CLIENT_REDZONE_SZB ); - VGM_(make_noaccess) ( (Addr)sc, sizeof(ShadowChunk) ); - add_to_freed_queue ( sc ); - VGP_POPCC; - return (void*)sc_new->data; - } -} - - -void VG_(clientmalloc_done) ( void ) -{ - UInt nblocks, nbytes, ml_no; - ShadowChunk* sc; - - client_malloc_init(); - - nblocks = nbytes = 0; - - for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++) { - for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next) { - nblocks ++; - nbytes += sc->size; - } - } - - if (VG_(clo_verbosity) == 0) - return; - - VG_(message)(Vg_UserMsg, - "malloc/free: in use at exit: %d bytes in %d blocks.", - nbytes, nblocks); - VG_(message)(Vg_UserMsg, - "malloc/free: %d allocs, %d frees, %d bytes allocated.", - vg_cmalloc_n_mallocs, - vg_cmalloc_n_frees, vg_cmalloc_bs_mallocd); - if (!VG_(clo_leak_check)) - VG_(message)(Vg_UserMsg, - "For a detailed leak analysis, rerun with: --leak-check=yes"); - if (0) - VG_(message)(Vg_DebugMsg, - "free search: %d tries, %d frees", - vg_mlist_tries, - vg_mlist_frees ); - if (VG_(clo_verbosity) > 1) - VG_(message)(Vg_UserMsg, ""); -} - - -/* Describe an address as best you can, for error messages, - putting the result in ai. */ - -void VG_(describe_addr) ( Addr a, AddrInfo* ai ) -{ - ShadowChunk* sc; - UInt ml_no; - Bool ok; - ThreadId tid; - - /* Perhaps it's a user-def'd block ? */ - ok = VG_(client_perm_maybe_describe)( a, ai ); - if (ok) - return; - /* Perhaps it's on a thread's stack? */ - tid = VG_(identify_stack_addr)(a); - if (tid != VG_INVALID_THREADID) { - ai->akind = Stack; - ai->stack_tid = tid; - return; - } - /* Search for a freed block which might bracket it. */ - for (sc = vg_freed_list_start; sc != NULL; sc = sc->next) { - if (sc->data - VG_AR_CLIENT_REDZONE_SZB <= a - && a < sc->data + sc->size + VG_AR_CLIENT_REDZONE_SZB) { - ai->akind = Freed; - ai->blksize = sc->size; - ai->rwoffset = (Int)(a) - (Int)(sc->data); - ai->lastchange = sc->where; - return; - } - } - /* Search for a mallocd block which might bracket it. */ - for (ml_no = 0; ml_no < VG_N_MALLOCLISTS; ml_no++) { - for (sc = vg_malloclist[ml_no]; sc != NULL; sc = sc->next) { - if (sc->data - VG_AR_CLIENT_REDZONE_SZB <= a - && a < sc->data + sc->size + VG_AR_CLIENT_REDZONE_SZB) { - ai->akind = Mallocd; - ai->blksize = sc->size; - ai->rwoffset = (Int)(a) - (Int)(sc->data); - ai->lastchange = sc->where; - return; - } - } - } - /* Clueless ... */ - ai->akind = Unknown; - return; -} - - -/*--------------------------------------------------------------------*/ -/*--- end vg_clientmalloc.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_constants.h b/coregrind/vg_constants.h deleted file mode 100644 index d3da14b1a9..0000000000 --- a/coregrind/vg_constants.h +++ /dev/null @@ -1,100 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- A header file containing constants (for assembly code). ---*/ -/*--- vg_constants.h ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#ifndef __VG_CONSTANTS_H -#define __VG_CONSTANTS_H - - -/* This file is included in all Valgrind source files, including - assembly ones. */ - -/* All symbols externally visible from valgrind.so are prefixed - as specified here. The prefix can be changed, so as to avoid - namespace conflict problems. -*/ -#define VGAPPEND(str1,str2) str1##str2 - -/* These macros should add different prefixes so the same base - name can safely be used across different macros. */ -#define VG_(str) VGAPPEND(vgPlain_,str) -#define VGM_(str) VGAPPEND(vgMem_,str) -#define VGP_(str) VGAPPEND(vgProf_,str) -#define VGOFF_(str) VGAPPEND(vgOff_,str) - - -/* Magic values that %ebp might be set to when returning to the - dispatcher. The only other legitimate value is to point to the - start of VG_(baseBlock). These also are return values from - VG_(run_innerloop) to the scheduler. - - EBP means %ebp can legitimately have this value when a basic block - returns to the dispatch loop. TRC means that this value is a valid - thread return code, which the dispatch loop may return to the - scheduler. */ -#define VG_TRC_EBP_JMP_STKADJ 17 /* EBP only; handled by dispatcher */ -#define VG_TRC_EBP_JMP_SYSCALL 19 /* EBP and TRC */ -#define VG_TRC_EBP_JMP_CLIENTREQ 23 /* EBP and TRC */ - -#define VG_TRC_INNER_COUNTERZERO 29 /* TRC only; means bb ctr == 0 */ -#define VG_TRC_INNER_FASTMISS 31 /* TRC only; means fast-cache miss. */ -#define VG_TRC_UNRESUMABLE_SIGNAL 37 /* TRC only; got sigsegv/sigbus */ - - -/* Debugging hack for assembly code ... sigh. */ -#if 0 -#define OYNK(nnn) pushal; pushl $nnn; call VG_(oynk) ; addl $4,%esp; popal -#else -#define OYNK(nnn) -#endif - -#if 0 -#define OYNNK(nnn) pushal; pushl $nnn; call VG_(oynk) ; addl $4,%esp; popal -#else -#define OYNNK(nnn) -#endif - - -/* Constants for the fast translation lookup cache. */ -#define VG_TT_FAST_BITS 15 -#define VG_TT_FAST_SIZE (1 << VG_TT_FAST_BITS) -#define VG_TT_FAST_MASK ((VG_TT_FAST_SIZE) - 1) - -/* Constants for the fast original-code-write check cache. */ - - -/* Assembly code stubs make this request */ -#define VG_USERREQ__SIGNAL_RETURNS 0x4001 - -#endif /* ndef __VG_INCLUDE_H */ - -/*--------------------------------------------------------------------*/ -/*--- end vg_constants.h ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_demangle.c b/coregrind/vg_demangle.c deleted file mode 100644 index f07f7f3465..0000000000 --- a/coregrind/vg_demangle.c +++ /dev/null @@ -1,73 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- Demangling of C++ mangled names. ---*/ -/*--- vg_demangle.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_include.h" -#include "demangle.h" - -#define ADD_TO_RESULT(zzstr,zzn) \ -{ \ - Char* zz = (zzstr); \ - Int nn = (zzn); \ - Int ii; \ - for (ii = 0; ii < nn; ii++) { \ - result[n_result] = zz[ii]; \ - if (n_result < result_size-1) n_result++; \ - result[n_result] = 0; \ - } \ -} - -void VG_(demangle) ( Char* orig, Char* result, Int result_size ) -{ - Int n_result = 0; - Char* demangled = NULL; - - if (VG_(clo_demangle)) - demangled = VG_(cplus_demangle) ( orig, DMGL_ANSI | DMGL_PARAMS ); - - if (demangled) { - ADD_TO_RESULT(demangled, VG_(strlen)(demangled)); - VG_(free) (VG_AR_DEMANGLE, demangled); - } else { - ADD_TO_RESULT(orig, VG_(strlen)(orig)); - } - - /* Check that the demangler isn't leaking. */ - /* 15 Feb 02: if this assertion fails, this is not a disaster. - Comment it out, and let me know. (jseward@acm.org). */ - vg_assert(VG_(is_empty_arena)(VG_AR_DEMANGLE)); - - /* VG_(show_all_arena_stats)(); */ -} - - -/*--------------------------------------------------------------------*/ -/*--- end vg_demangle.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_dispatch.S b/coregrind/vg_dispatch.S deleted file mode 100644 index bd1c5b959f..0000000000 --- a/coregrind/vg_dispatch.S +++ /dev/null @@ -1,212 +0,0 @@ - -##--------------------------------------------------------------------## -##--- The core dispatch loop, for jumping to a code address. ---## -##--- vg_dispatch.S ---## -##--------------------------------------------------------------------## - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_constants.h" - - -/*------------------------------------------------------------*/ -/*--- The normal-case dispatch machinery. ---*/ -/*------------------------------------------------------------*/ - -/* To transfer to an (original) code address, load it into %eax and - jump to vg_dispatch. This fragment of code tries to find the - address of the corresponding translation by searching the translation - table. If it fails, a new translation is made, added to the - translation table, and then jumped to. Almost all the hard - work is done by C routines; this code simply handles the - common case fast -- when the translation address is found in - the translation cache. - - At entry, %eax is the only live (real-machine) register; the - entire simulated state is tidily saved in vg_m_state. -*/ - - -/* The C world needs a way to get started simulating. So we provide - a function void vg_run_innerloop ( void ), which starts running - from vg_m_eip, and exits when the counter reaches zero. This loop - can also exit if vg_oursignalhandler() catches a non-resumable - signal, for example SIGSEGV. It then longjmp()s back past here. -*/ - -.globl VG_(run_innerloop) -VG_(run_innerloop): - #OYNK(1000) - - # ----- entry point to VG_(run_innerloop) ----- - pushl %ebx - pushl %ecx - pushl %edx - pushl %esi - pushl %edi - pushl %ebp - - # Set up the baseBlock pointer - movl $VG_(baseBlock), %ebp - - # fetch m_eip into %eax - movl VGOFF_(m_eip), %esi - movl (%ebp, %esi, 4), %eax - - # Start off dispatching paranoically, since we no longer have - # any indication whether or not this might be a special call/ret - # transfer. - jmp dispatch_stkadj - - -dispatch_main: - # Jump here to do a new dispatch. - # %eax holds destination (original) address. - # %ebp indicates further details of the control transfer - # requested to the address in %eax. - # - # If ebp == & VG_(baseBlock), just jump next to %eax. - # - # If ebp == VG_EBP_JMP_SYSCALL, do a system call before - # continuing at eax. - # - # If ebp == VG_EBP_JMP_CLIENTREQ, do a client request before - # continuing at eax. - # - # If %ebp has any other value, we panic. - - cmpl $VG_(baseBlock), %ebp - jnz dispatch_exceptional - -dispatch_boring: - # save the jump address at VG_(baseBlock)[VGOFF_(m_eip)], - movl VGOFF_(m_eip), %esi - movl %eax, (%ebp, %esi, 4) - - # do a timeslice check. - # are we out of timeslice? If yes, defer to scheduler. - #OYNK(1001) - decl VG_(dispatch_ctr) - jz counter_is_zero - - #OYNK(1002) - # try a fast lookup in the translation cache - movl %eax, %ebx - andl $VG_TT_FAST_MASK, %ebx - # ebx = tt_fast index - movl VG_(tt_fast)(,%ebx,4), %ebx - # ebx points at a tt entry - # now compare target with the tte.orig_addr field (+0) - cmpl %eax, (%ebx) - jnz fast_lookup_failed - - # Found a match. Set the tte.mru_epoch field (+8) - # and call the tte.trans_addr field (+4) - movl VG_(current_epoch), %ecx - movl %ecx, 8(%ebx) - call *4(%ebx) - jmp dispatch_main - -fast_lookup_failed: - # %EIP is up to date here since dispatch_boring dominates - movl $VG_TRC_INNER_FASTMISS, %eax - jmp run_innerloop_exit - -counter_is_zero: - # %EIP is up to date here since dispatch_boring dominates - movl $VG_TRC_INNER_COUNTERZERO, %eax - jmp run_innerloop_exit - -run_innerloop_exit: - popl %ebp - popl %edi - popl %esi - popl %edx - popl %ecx - popl %ebx - ret - - - -/* Other ways of getting out of the inner loop. Placed out-of-line to - make it look cleaner. -*/ -dispatch_exceptional: - # this is jumped to only, not fallen-through from above - cmpl $VG_TRC_EBP_JMP_STKADJ, %ebp - jz dispatch_stkadj - cmpl $VG_TRC_EBP_JMP_SYSCALL, %ebp - jz dispatch_syscall - cmpl $VG_TRC_EBP_JMP_CLIENTREQ, %ebp - jz dispatch_clientreq - - # ebp has an invalid value ... crap out. - pushl $panic_msg_ebp - call VG_(panic) - # (never returns) - -dispatch_syscall: - # save %eax in %EIP and defer to sched - movl $VG_(baseBlock), %ebp - movl VGOFF_(m_eip), %esi - movl %eax, (%ebp, %esi, 4) - movl $VG_TRC_EBP_JMP_SYSCALL, %eax - jmp run_innerloop_exit - -dispatch_clientreq: - # save %eax in %EIP and defer to sched - movl $VG_(baseBlock), %ebp - movl VGOFF_(m_eip), %esi - movl %eax, (%ebp, %esi, 4) - movl $VG_TRC_EBP_JMP_CLIENTREQ, %eax - jmp run_innerloop_exit - -dispatch_stkadj: - # save %eax in %EIP - movl $VG_(baseBlock), %ebp - movl VGOFF_(m_eip), %esi - movl %eax, (%ebp, %esi, 4) - - # see if we need to mess with stack blocks - pushl %eax - call VG_(delete_client_stack_blocks_following_ESP_change) - popl %eax - movl $VG_(baseBlock), %ebp - - # ok, its not interesting. Handle the normal way. - jmp dispatch_boring - - -.data -panic_msg_ebp: -.ascii "vg_dispatch: %ebp has invalid value!" -.byte 0 -.text - - -##--------------------------------------------------------------------## -##--- end vg_dispatch.S ---## -##--------------------------------------------------------------------## diff --git a/coregrind/vg_errcontext.c b/coregrind/vg_errcontext.c deleted file mode 100644 index 46838b603f..0000000000 --- a/coregrind/vg_errcontext.c +++ /dev/null @@ -1,1234 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- Management of error messages. vg_errcontext.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_include.h" -#include "vg_constants.h" - - -/*------------------------------------------------------------*/ -/*--- Defns ---*/ -/*------------------------------------------------------------*/ - -/* Suppression is a type describing an error which we want to - suppress, ie, not show the user, usually because it is caused by a - problem in a library which we can't fix, replace or work around. - Suppressions are read from a file at startup time, specified by - vg_clo_suppressions, and placed in the vg_suppressions list. This - gives flexibility so that new suppressions can be added to the file - as and when needed. -*/ -typedef - enum { - /* Bad syscall params */ - Param, - /* Use of invalid values of given size */ - Value0, Value1, Value2, Value4, Value8, - /* Invalid read/write attempt at given size */ - Addr1, Addr2, Addr4, Addr8, - /* Invalid or mismatching free */ - FreeS, - /* Pthreading error */ - PThread - } - SuppressionKind; - - -/* For each caller specified for a suppression, record the nature of - the caller name. */ -typedef - enum { - /* Name is of an shared object file. */ - ObjName, - /* Name is of a function. */ - FunName - } - SuppressionLocTy; - - -/* A complete suppression record. */ -typedef - struct _Suppression { - struct _Suppression* next; - /* The number of times this error has been suppressed. */ - Int count; - /* The name by which the suppression is referred to. */ - Char* sname; - /* What kind of suppression. */ - SuppressionKind skind; - /* Name of syscall param if skind==Param */ - Char* param; - /* Name of fn where err occurs, and immediate caller (mandatory). */ - SuppressionLocTy caller0_ty; - Char* caller0; - SuppressionLocTy caller1_ty; - Char* caller1; - /* Optional extra callers. */ - SuppressionLocTy caller2_ty; - Char* caller2; - SuppressionLocTy caller3_ty; - Char* caller3; - } - Suppression; - - -/* ErrContext is a type for recording just enough info to generate an - error report for an illegal memory access. The idea is that - (typically) the same few points in the program generate thousands - of illegal accesses, and we don't want to spew out a fresh error - message for each one. Instead, we use these structures to common - up duplicates. -*/ - -/* What kind of error it is. */ -typedef - enum { ValueErr, AddrErr, - ParamErr, UserErr, /* behaves like an anonymous ParamErr */ - FreeErr, FreeMismatchErr, - PThreadErr /* pthread API error */ - } - ErrKind; - -/* What kind of memory access is involved in the error? */ -typedef - enum { ReadAxs, WriteAxs, ExecAxs } - AxsKind; - -/* Top-level struct for recording errors. */ -typedef - struct _ErrContext { - /* ALL */ - struct _ErrContext* next; - /* ALL */ - /* NULL if unsuppressed; or ptr to suppression record. */ - Suppression* supp; - /* ALL */ - Int count; - /* ALL */ - ErrKind ekind; - /* ALL */ - ExeContext* where; - /* Addr */ - AxsKind axskind; - /* Addr, Value */ - Int size; - /* Addr, Free, Param, User */ - Addr addr; - /* Addr, Free, Param, User */ - AddrInfo addrinfo; - /* Param; hijacked for PThread as a description */ - Char* syscall_param; - /* Param, User */ - Bool isWriteableLack; - /* ALL */ - ThreadId tid; - /* ALL */ - /* These record %EIP, %ESP and %EBP at the error point. They - are only used to make GDB-attaching convenient; there is no - other purpose; specifically they are not used to do - comparisons between errors. */ - UInt m_eip; - UInt m_esp; - UInt m_ebp; - } - ErrContext; - -/* The list of error contexts found, both suppressed and unsuppressed. - Initially empty, and grows as errors are detected. */ -static ErrContext* vg_err_contexts = NULL; - -/* The list of suppression directives, as read from the specified - suppressions file. */ -static Suppression* vg_suppressions = NULL; - -/* Running count of unsuppressed errors detected. */ -static UInt vg_n_errs_found = 0; - -/* Running count of suppressed errors detected. */ -static UInt vg_n_errs_suppressed = 0; - -/* Used to disable further error reporting once some huge number of - errors have already been logged. */ -static Bool vg_ignore_errors = False; - -/* forwards ... */ -static Suppression* is_suppressible_error ( ErrContext* ec ); - - -/*------------------------------------------------------------*/ -/*--- Helper fns ---*/ -/*------------------------------------------------------------*/ - - -static void clear_AddrInfo ( AddrInfo* ai ) -{ - ai->akind = Unknown; - ai->blksize = 0; - ai->rwoffset = 0; - ai->lastchange = NULL; - ai->stack_tid = VG_INVALID_THREADID; - ai->maybe_gcc = False; -} - -static void clear_ErrContext ( ErrContext* ec ) -{ - ec->next = NULL; - ec->supp = NULL; - ec->count = 0; - ec->ekind = ValueErr; - ec->where = NULL; - ec->axskind = ReadAxs; - ec->size = 0; - ec->addr = 0; - clear_AddrInfo ( &ec->addrinfo ); - ec->syscall_param = NULL; - ec->isWriteableLack = False; - ec->m_eip = 0xDEADB00F; - ec->m_esp = 0xDEADBE0F; - ec->m_ebp = 0xDEADB0EF; - ec->tid = VG_INVALID_THREADID; -} - - -static __inline__ -Bool vg_eq_ExeContext ( Bool top_2_only, - ExeContext* e1, ExeContext* e2 ) -{ - /* Note that frames after the 4th are always ignored. */ - if (top_2_only) { - return VG_(eq_ExeContext_top2(e1, e2)); - } else { - return VG_(eq_ExeContext_top4(e1, e2)); - } -} - - -static Bool eq_AddrInfo ( Bool cheap_addr_cmp, - AddrInfo* ai1, AddrInfo* ai2 ) -{ - if (ai1->akind != Undescribed - && ai2->akind != Undescribed - && ai1->akind != ai2->akind) - return False; - if (ai1->akind == Freed || ai1->akind == Mallocd) { - if (ai1->blksize != ai2->blksize) - return False; - if (!vg_eq_ExeContext(cheap_addr_cmp, - ai1->lastchange, ai2->lastchange)) - return False; - } - return True; -} - -/* Compare error contexts, to detect duplicates. Note that if they - are otherwise the same, the faulting addrs and associated rwoffsets - are allowed to be different. */ - -static Bool eq_ErrContext ( Bool cheap_addr_cmp, - ErrContext* e1, ErrContext* e2 ) -{ - if (e1->ekind != e2->ekind) - return False; - if (!vg_eq_ExeContext(cheap_addr_cmp, e1->where, e2->where)) - return False; - - switch (e1->ekind) { - case PThreadErr: - if (e1->syscall_param == e2->syscall_param) - return True; - if (0 == VG_(strcmp)(e1->syscall_param, e2->syscall_param)) - return True; - return False; - case UserErr: - case ParamErr: - if (e1->isWriteableLack != e2->isWriteableLack) return False; - if (e1->ekind == ParamErr - && 0 != VG_(strcmp)(e1->syscall_param, e2->syscall_param)) - return False; - return True; - case FreeErr: - case FreeMismatchErr: - if (e1->addr != e2->addr) return False; - if (!eq_AddrInfo(cheap_addr_cmp, &e1->addrinfo, &e2->addrinfo)) - return False; - return True; - case AddrErr: - if (e1->axskind != e2->axskind) return False; - if (e1->size != e2->size) return False; - if (!eq_AddrInfo(cheap_addr_cmp, &e1->addrinfo, &e2->addrinfo)) - return False; - return True; - case ValueErr: - if (e1->size != e2->size) return False; - return True; - default: - VG_(panic)("eq_ErrContext"); - } -} - -static void pp_AddrInfo ( Addr a, AddrInfo* ai ) -{ - switch (ai->akind) { - case Stack: - VG_(message)(Vg_UserMsg, - " Address 0x%x is on thread %d's stack", - a, ai->stack_tid); - break; - case Unknown: - if (ai->maybe_gcc) { - VG_(message)(Vg_UserMsg, - " Address 0x%x is just below %%esp. Possibly a bug in GCC/G++", - a); - VG_(message)(Vg_UserMsg, - " v 2.96 or 3.0.X. To suppress, use: --workaround-gcc296-bugs=yes"); - } else { - VG_(message)(Vg_UserMsg, - " Address 0x%x is not stack'd, malloc'd or free'd", a); - } - break; - case Freed: case Mallocd: case UserG: case UserS: { - UInt delta; - UChar* relative; - if (ai->rwoffset < 0) { - delta = (UInt)(- ai->rwoffset); - relative = "before"; - } else if (ai->rwoffset >= ai->blksize) { - delta = ai->rwoffset - ai->blksize; - relative = "after"; - } else { - delta = ai->rwoffset; - relative = "inside"; - } - if (ai->akind == UserS) { - VG_(message)(Vg_UserMsg, - " Address 0x%x is %d bytes %s a %d-byte stack red-zone created", - a, delta, relative, - ai->blksize ); - } else { - VG_(message)(Vg_UserMsg, - " Address 0x%x is %d bytes %s a block of size %d %s", - a, delta, relative, - ai->blksize, - ai->akind==Mallocd ? "alloc'd" - : ai->akind==Freed ? "free'd" - : "client-defined"); - } - VG_(pp_ExeContext)(ai->lastchange); - break; - } - default: - VG_(panic)("pp_AddrInfo"); - } -} - -static void pp_ErrContext ( ErrContext* ec, Bool printCount ) -{ - if (printCount) - VG_(message)(Vg_UserMsg, "Observed %d times:", ec->count ); - if (ec->tid > 1) - VG_(message)(Vg_UserMsg, "Thread %d:", ec->tid ); - switch (ec->ekind) { - case ValueErr: - if (ec->size == 0) { - VG_(message)( - Vg_UserMsg, - "Conditional jump or move depends on uninitialised value(s)"); - } else { - VG_(message)(Vg_UserMsg, - "Use of uninitialised value of size %d", - ec->size); - } - VG_(pp_ExeContext)(ec->where); - break; - case AddrErr: - switch (ec->axskind) { - case ReadAxs: - VG_(message)(Vg_UserMsg, "Invalid read of size %d", - ec->size ); - break; - case WriteAxs: - VG_(message)(Vg_UserMsg, "Invalid write of size %d", - ec->size ); - break; - case ExecAxs: - VG_(message)(Vg_UserMsg, "Jump to the invalid address " - "stated on the next line"); - break; - default: - VG_(panic)("pp_ErrContext(axskind)"); - } - VG_(pp_ExeContext)(ec->where); - pp_AddrInfo(ec->addr, &ec->addrinfo); - break; - case FreeErr: - VG_(message)(Vg_UserMsg,"Invalid free() / delete / delete[]"); - /* fall through */ - case FreeMismatchErr: - if (ec->ekind == FreeMismatchErr) - VG_(message)(Vg_UserMsg, - "Mismatched free() / delete / delete []"); - VG_(pp_ExeContext)(ec->where); - pp_AddrInfo(ec->addr, &ec->addrinfo); - break; - case ParamErr: - if (ec->isWriteableLack) { - VG_(message)(Vg_UserMsg, - "Syscall param %s contains unaddressable byte(s)", - ec->syscall_param ); - } else { - VG_(message)(Vg_UserMsg, - "Syscall param %s contains uninitialised or " - "unaddressable byte(s)", - ec->syscall_param); - } - VG_(pp_ExeContext)(ec->where); - pp_AddrInfo(ec->addr, &ec->addrinfo); - break; - case UserErr: - if (ec->isWriteableLack) { - VG_(message)(Vg_UserMsg, - "Unaddressable byte(s) found during client check request"); - } else { - VG_(message)(Vg_UserMsg, - "Uninitialised or " - "unaddressable byte(s) found during client check request"); - } - VG_(pp_ExeContext)(ec->where); - pp_AddrInfo(ec->addr, &ec->addrinfo); - break; - case PThreadErr: - VG_(message)(Vg_UserMsg, "%s", ec->syscall_param ); - VG_(pp_ExeContext)(ec->where); - break; - default: - VG_(panic)("pp_ErrContext"); - } -} - - -/* Figure out if we want to attach for GDB for this error, possibly - by asking the user. */ -static -Bool vg_is_GDB_attach_requested ( void ) -{ - Char ch, ch2; - Int res; - - if (VG_(clo_GDB_attach) == False) - return False; - - VG_(message)(Vg_UserMsg, ""); - - again: - VG_(printf)( - "==%d== " - "---- Attach to GDB ? --- [Return/N/n/Y/y/C/c] ---- ", - VG_(getpid)() - ); - - res = VG_(read)(0 /*stdin*/, &ch, 1); - if (res != 1) goto ioerror; - /* res == 1 */ - if (ch == '\n') return False; - if (ch != 'N' && ch != 'n' && ch != 'Y' && ch != 'y' - && ch != 'C' && ch != 'c') goto again; - - res = VG_(read)(0 /*stdin*/, &ch2, 1); - if (res != 1) goto ioerror; - if (ch2 != '\n') goto again; - - /* No, don't want to attach. */ - if (ch == 'n' || ch == 'N') return False; - /* Yes, want to attach. */ - if (ch == 'y' || ch == 'Y') return True; - /* No, don't want to attach, and don't ask again either. */ - vg_assert(ch == 'c' || ch == 'C'); - - ioerror: - VG_(clo_GDB_attach) = False; - return False; -} - - -/* Top-level entry point to the error management subsystem. All - detected errors are notified here; this routine decides if/when the - user should see the error. */ -static void VG_(maybe_add_context) ( ErrContext* ec ) -{ - ErrContext* p; - ErrContext* p_prev; - Bool cheap_addr_cmp = False; - static Bool is_first_shown_context = True; - static Bool stopping_message = False; - static Bool slowdown_message = False; - static Int vg_n_errs_shown = 0; - - vg_assert(ec->tid >= 0 && ec->tid < VG_N_THREADS); - - /* After M_VG_COLLECT_NO_ERRORS_AFTER_SHOWN different errors have - been found, or M_VG_COLLECT_NO_ERRORS_AFTER_FOUND total errors - have been found, just refuse to collect any more. This stops - the burden of the error-management system becoming excessive in - extremely buggy programs, although it does make it pretty - pointless to continue the Valgrind run after this point. */ - if (VG_(clo_error_limit) - && (vg_n_errs_shown >= M_VG_COLLECT_NO_ERRORS_AFTER_SHOWN - || vg_n_errs_found >= M_VG_COLLECT_NO_ERRORS_AFTER_FOUND)) { - if (!stopping_message) { - VG_(message)(Vg_UserMsg, ""); - - if (vg_n_errs_shown >= M_VG_COLLECT_NO_ERRORS_AFTER_SHOWN) { - VG_(message)(Vg_UserMsg, - "More than %d different errors detected. " - "I'm not reporting any more.", - M_VG_COLLECT_NO_ERRORS_AFTER_SHOWN ); - } else { - VG_(message)(Vg_UserMsg, - "More than %d total errors detected. " - "I'm not reporting any more.", - M_VG_COLLECT_NO_ERRORS_AFTER_FOUND ); - } - - VG_(message)(Vg_UserMsg, - "Final error counts will be inaccurate. Go fix your program!"); - VG_(message)(Vg_UserMsg, - "Rerun with --error-limit=no to disable this cutoff. Note"); - VG_(message)(Vg_UserMsg, - "that your program may now segfault without prior warning from"); - VG_(message)(Vg_UserMsg, - "Valgrind, because errors are no longer being displayed."); - VG_(message)(Vg_UserMsg, ""); - stopping_message = True; - vg_ignore_errors = True; - } - return; - } - - /* After M_VG_COLLECT_ERRORS_SLOWLY_AFTER different errors have - been found, be much more conservative about collecting new - ones. */ - if (vg_n_errs_shown >= M_VG_COLLECT_ERRORS_SLOWLY_AFTER) { - cheap_addr_cmp = True; - if (!slowdown_message) { - VG_(message)(Vg_UserMsg, ""); - VG_(message)(Vg_UserMsg, - "More than %d errors detected. Subsequent errors", - M_VG_COLLECT_ERRORS_SLOWLY_AFTER); - VG_(message)(Vg_UserMsg, - "will still be recorded, but in less detail than before."); - slowdown_message = True; - } - } - - - /* First, see if we've got an error record matching this one. */ - p = vg_err_contexts; - p_prev = NULL; - while (p != NULL) { - if (eq_ErrContext(cheap_addr_cmp, p, ec)) { - /* Found it. */ - p->count++; - if (p->supp != NULL) { - /* Deal correctly with suppressed errors. */ - p->supp->count++; - vg_n_errs_suppressed++; - } else { - vg_n_errs_found++; - } - - /* Move p to the front of the list so that future searches - for it are faster. */ - if (p_prev != NULL) { - vg_assert(p_prev->next == p); - p_prev->next = p->next; - p->next = vg_err_contexts; - vg_err_contexts = p; - } - return; - } - p_prev = p; - p = p->next; - } - - /* Didn't see it. Copy and add. */ - - /* OK, we're really going to collect it. First, describe any addr - info in the error. */ - if (ec->addrinfo.akind == Undescribed) - VG_(describe_addr) ( ec->addr, &ec->addrinfo ); - - p = VG_(malloc)(VG_AR_ERRCTXT, sizeof(ErrContext)); - *p = *ec; - p->next = vg_err_contexts; - p->supp = is_suppressible_error(ec); - vg_err_contexts = p; - if (p->supp == NULL) { - vg_n_errs_found++; - if (!is_first_shown_context) - VG_(message)(Vg_UserMsg, ""); - pp_ErrContext(p, False); - is_first_shown_context = False; - vg_n_errs_shown++; - /* Perhaps we want a GDB attach at this point? */ - if (vg_is_GDB_attach_requested()) { - VG_(swizzle_esp_then_start_GDB)( - ec->m_eip, ec->m_esp, ec->m_ebp); - } - } else { - vg_n_errs_suppressed++; - p->supp->count++; - } -} - - - - -/*------------------------------------------------------------*/ -/*--- Exported fns ---*/ -/*------------------------------------------------------------*/ - -/* These two are called from generated code, so that the %EIP/%EBP - values that we need in order to create proper error messages are - picked up out of VG_(baseBlock) rather than from the thread table - (vg_threads in vg_scheduler.c). */ - -void VG_(record_value_error) ( Int size ) -{ - ErrContext ec; - if (vg_ignore_errors) return; - clear_ErrContext( &ec ); - ec.count = 1; - ec.next = NULL; - ec.where = VG_(get_ExeContext)( False, VG_(baseBlock)[VGOFF_(m_eip)], - VG_(baseBlock)[VGOFF_(m_ebp)] ); - ec.ekind = ValueErr; - ec.size = size; - ec.tid = VG_(get_current_tid)(); - ec.m_eip = VG_(baseBlock)[VGOFF_(m_eip)]; - ec.m_esp = VG_(baseBlock)[VGOFF_(m_esp)]; - ec.m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)]; - VG_(maybe_add_context) ( &ec ); -} - -void VG_(record_address_error) ( Addr a, Int size, Bool isWrite ) -{ - ErrContext ec; - Bool just_below_esp; - if (vg_ignore_errors) return; - - just_below_esp - = VG_(is_just_below_ESP)( VG_(baseBlock)[VGOFF_(m_esp)], a ); - - /* If this is caused by an access immediately below %ESP, and the - user asks nicely, we just ignore it. */ - if (VG_(clo_workaround_gcc296_bugs) && just_below_esp) - return; - - clear_ErrContext( &ec ); - ec.count = 1; - ec.next = NULL; - ec.where = VG_(get_ExeContext)( False, VG_(baseBlock)[VGOFF_(m_eip)], - VG_(baseBlock)[VGOFF_(m_ebp)] ); - ec.ekind = AddrErr; - ec.axskind = isWrite ? WriteAxs : ReadAxs; - ec.size = size; - ec.addr = a; - ec.tid = VG_(get_current_tid)(); - ec.m_eip = VG_(baseBlock)[VGOFF_(m_eip)]; - ec.m_esp = VG_(baseBlock)[VGOFF_(m_esp)]; - ec.m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)]; - ec.addrinfo.akind = Undescribed; - ec.addrinfo.maybe_gcc = just_below_esp; - VG_(maybe_add_context) ( &ec ); -} - - -/* These five are called not from generated code but in response to - requests passed back to the scheduler. So we pick up %EIP/%EBP - values from the stored thread state, not from VG_(baseBlock). */ - -void VG_(record_free_error) ( ThreadState* tst, Addr a ) -{ - ErrContext ec; - if (vg_ignore_errors) return; - clear_ErrContext( &ec ); - ec.count = 1; - ec.next = NULL; - ec.where = VG_(get_ExeContext)( False, tst->m_eip, tst->m_ebp ); - ec.ekind = FreeErr; - ec.addr = a; - ec.tid = tst->tid; - ec.m_eip = tst->m_eip; - ec.m_esp = tst->m_esp; - ec.m_ebp = tst->m_ebp; - ec.addrinfo.akind = Undescribed; - VG_(maybe_add_context) ( &ec ); -} - -void VG_(record_freemismatch_error) ( ThreadState* tst, Addr a ) -{ - ErrContext ec; - if (vg_ignore_errors) return; - clear_ErrContext( &ec ); - ec.count = 1; - ec.next = NULL; - ec.where = VG_(get_ExeContext)( False, tst->m_eip, tst->m_ebp ); - ec.ekind = FreeMismatchErr; - ec.addr = a; - ec.tid = tst->tid; - ec.m_eip = tst->m_eip; - ec.m_esp = tst->m_esp; - ec.m_ebp = tst->m_ebp; - ec.addrinfo.akind = Undescribed; - VG_(maybe_add_context) ( &ec ); -} - -void VG_(record_jump_error) ( ThreadState* tst, Addr a ) -{ - ErrContext ec; - if (vg_ignore_errors) return; - clear_ErrContext( &ec ); - ec.count = 1; - ec.next = NULL; - ec.where = VG_(get_ExeContext)( False, tst->m_eip, tst->m_ebp ); - ec.ekind = AddrErr; - ec.axskind = ExecAxs; - ec.addr = a; - ec.tid = tst->tid; - ec.m_eip = tst->m_eip; - ec.m_esp = tst->m_esp; - ec.m_ebp = tst->m_ebp; - ec.addrinfo.akind = Undescribed; - VG_(maybe_add_context) ( &ec ); -} - -void VG_(record_param_err) ( ThreadState* tst, Addr a, Bool isWriteLack, - Char* msg ) -{ - ErrContext ec; - if (vg_ignore_errors) return; - clear_ErrContext( &ec ); - ec.count = 1; - ec.next = NULL; - ec.where = VG_(get_ExeContext)( False, tst->m_eip, tst->m_ebp ); - ec.ekind = ParamErr; - ec.addr = a; - ec.tid = tst->tid; - ec.m_eip = tst->m_eip; - ec.m_esp = tst->m_esp; - ec.m_ebp = tst->m_ebp; - ec.addrinfo.akind = Undescribed; - ec.syscall_param = msg; - ec.isWriteableLack = isWriteLack; - VG_(maybe_add_context) ( &ec ); -} - -void VG_(record_user_err) ( ThreadState* tst, Addr a, Bool isWriteLack ) -{ - ErrContext ec; - if (vg_ignore_errors) return; - clear_ErrContext( &ec ); - ec.count = 1; - ec.next = NULL; - ec.where = VG_(get_ExeContext)( False, tst->m_eip, tst->m_ebp ); - ec.ekind = UserErr; - ec.addr = a; - ec.tid = tst->tid; - ec.m_eip = tst->m_eip; - ec.m_esp = tst->m_esp; - ec.m_ebp = tst->m_ebp; - ec.addrinfo.akind = Undescribed; - ec.isWriteableLack = isWriteLack; - VG_(maybe_add_context) ( &ec ); -} - -void VG_(record_pthread_err) ( ThreadId tid, Char* msg ) -{ - ErrContext ec; - if (vg_ignore_errors) return; - if (!VG_(clo_instrument)) return; - clear_ErrContext( &ec ); - ec.count = 1; - ec.next = NULL; - ec.where = VG_(get_ExeContext)( False, VG_(threads)[tid].m_eip, - VG_(threads)[tid].m_ebp ); - ec.ekind = PThreadErr; - ec.tid = tid; - ec.syscall_param = msg; - ec.m_eip = VG_(threads)[tid].m_eip; - ec.m_esp = VG_(threads)[tid].m_esp; - ec.m_ebp = VG_(threads)[tid].m_ebp; - VG_(maybe_add_context) ( &ec ); -} - - -/*------------------------------*/ - -void VG_(show_all_errors) ( void ) -{ - Int i, n_min; - Int n_err_contexts, n_supp_contexts; - ErrContext *p, *p_min; - Suppression *su; - Bool any_supp; - - if (VG_(clo_verbosity) == 0) - return; - - n_err_contexts = 0; - for (p = vg_err_contexts; p != NULL; p = p->next) { - if (p->supp == NULL) - n_err_contexts++; - } - - n_supp_contexts = 0; - for (su = vg_suppressions; su != NULL; su = su->next) { - if (su->count > 0) - n_supp_contexts++; - } - - VG_(message)(Vg_UserMsg, - "ERROR SUMMARY: " - "%d errors from %d contexts (suppressed: %d from %d)", - vg_n_errs_found, n_err_contexts, - vg_n_errs_suppressed, n_supp_contexts ); - - if (VG_(clo_verbosity) <= 1) - return; - - /* Print the contexts in order of increasing error count. */ - for (i = 0; i < n_err_contexts; i++) { - n_min = (1 << 30) - 1; - p_min = NULL; - for (p = vg_err_contexts; p != NULL; p = p->next) { - if (p->supp != NULL) continue; - if (p->count < n_min) { - n_min = p->count; - p_min = p; - } - } - if (p_min == NULL) VG_(panic)("pp_AllErrContexts"); - - VG_(message)(Vg_UserMsg, ""); - VG_(message)(Vg_UserMsg, "%d errors in context %d of %d:", - p_min->count, - i+1, n_err_contexts); - pp_ErrContext( p_min, False ); - - if ((i+1 == VG_(clo_dump_error))) { - VG_(translate) ( 0 /* dummy ThreadId; irrelevant due to below NULLs */, - p_min->where->eips[0], NULL, NULL, NULL ); - } - - p_min->count = 1 << 30; - } - - if (n_supp_contexts > 0) - VG_(message)(Vg_DebugMsg, ""); - any_supp = False; - for (su = vg_suppressions; su != NULL; su = su->next) { - if (su->count > 0) { - any_supp = True; - VG_(message)(Vg_DebugMsg, "supp: %4d %s", su->count, - su->sname); - } - } - - if (n_err_contexts > 0) { - if (any_supp) - VG_(message)(Vg_UserMsg, ""); - VG_(message)(Vg_UserMsg, - "IN SUMMARY: " - "%d errors from %d contexts (suppressed: %d from %d)", - vg_n_errs_found, n_err_contexts, - vg_n_errs_suppressed, - n_supp_contexts ); - VG_(message)(Vg_UserMsg, ""); - } -} - -/*------------------------------------------------------------*/ -/*--- Standard suppressions ---*/ -/*------------------------------------------------------------*/ - -/* Get a non-blank, non-comment line of at most nBuf chars from fd. - Skips leading spaces on the line. Return True if EOF was hit instead. -*/ - -#define VG_ISSPACE(ch) (((ch)==' ') || ((ch)=='\n') || ((ch)=='\t')) - -static Bool getLine ( Int fd, Char* buf, Int nBuf ) -{ - Char ch; - Int n, i; - while (True) { - /* First, read until a non-blank char appears. */ - while (True) { - n = VG_(read)(fd, &ch, 1); - if (n == 1 && !VG_ISSPACE(ch)) break; - if (n == 0) return True; - } - - /* Now, read the line into buf. */ - i = 0; - buf[i++] = ch; buf[i] = 0; - while (True) { - n = VG_(read)(fd, &ch, 1); - if (n == 0) return False; /* the next call will return True */ - if (ch == '\n') break; - if (i > 0 && i == nBuf-1) i--; - buf[i++] = ch; buf[i] = 0; - } - while (i > 1 && VG_ISSPACE(buf[i-1])) { - i--; buf[i] = 0; - }; - - /* VG_(printf)("The line is `%s'\n", buf); */ - /* Ok, we have a line. If a non-comment line, return. - If a comment line, start all over again. */ - if (buf[0] != '#') return False; - } -} - - -/* *p_caller contains the raw name of a caller, supposedly either - fun:some_function_name or - obj:some_object_name. - Set *p_ty accordingly and advance *p_caller over the descriptor - (fun: or obj:) part. - Returns False if failed. -*/ -static Bool setLocationTy ( Char** p_caller, SuppressionLocTy* p_ty ) -{ - if (VG_(strncmp)(*p_caller, "fun:", 4) == 0) { - (*p_caller) += 4; - *p_ty = FunName; - return True; - } - if (VG_(strncmp)(*p_caller, "obj:", 4) == 0) { - (*p_caller) += 4; - *p_ty = ObjName; - return True; - } - VG_(printf)("location should start with fun: or obj:\n"); - return False; -} - - -/* Read suppressions from the file specified in vg_clo_suppressions - and place them in the suppressions list. If there's any difficulty - doing this, just give up -- there's no point in trying to recover. -*/ -#define STREQ(s1,s2) (s1 != NULL && s2 != NULL \ - && VG_(strcmp)((s1),(s2))==0) - -static Char* copyStr ( Char* str ) -{ - Int n, i; - Char* str2; - n = VG_(strlen)(str); - str2 = VG_(malloc)(VG_AR_PRIVATE, n+1); - vg_assert(n > 0); - for (i = 0; i < n+1; i++) str2[i] = str[i]; - return str2; -} - -static void load_one_suppressions_file ( Char* filename ) -{ -# define N_BUF 200 - Int fd; - Bool eof; - Char buf[N_BUF+1]; - fd = VG_(open_read)( filename ); - if (fd == -1) { - VG_(message)(Vg_UserMsg, - "FATAL: can't open suppressions file `%s'", - filename ); - VG_(exit)(1); - } - - while (True) { - Suppression* supp; - supp = VG_(malloc)(VG_AR_PRIVATE, sizeof(Suppression)); - supp->count = 0; - supp->param = supp->caller0 = supp->caller1 - = supp->caller2 = supp->caller3 = NULL; - - eof = getLine ( fd, buf, N_BUF ); - if (eof) break; - - if (!STREQ(buf, "{")) goto syntax_error; - - eof = getLine ( fd, buf, N_BUF ); - if (eof || STREQ(buf, "}")) goto syntax_error; - supp->sname = copyStr(buf); - - eof = getLine ( fd, buf, N_BUF ); - if (eof) goto syntax_error; - else if (STREQ(buf, "Param")) supp->skind = Param; - else if (STREQ(buf, "Value0")) supp->skind = Value0; /* backwards compat */ - else if (STREQ(buf, "Cond")) supp->skind = Value0; - else if (STREQ(buf, "Value1")) supp->skind = Value1; - else if (STREQ(buf, "Value2")) supp->skind = Value2; - else if (STREQ(buf, "Value4")) supp->skind = Value4; - else if (STREQ(buf, "Value8")) supp->skind = Value8; - else if (STREQ(buf, "Addr1")) supp->skind = Addr1; - else if (STREQ(buf, "Addr2")) supp->skind = Addr2; - else if (STREQ(buf, "Addr4")) supp->skind = Addr4; - else if (STREQ(buf, "Addr8")) supp->skind = Addr8; - else if (STREQ(buf, "Free")) supp->skind = FreeS; - else if (STREQ(buf, "PThread")) supp->skind = PThread; - else goto syntax_error; - - if (supp->skind == Param) { - eof = getLine ( fd, buf, N_BUF ); - if (eof) goto syntax_error; - supp->param = copyStr(buf); - } - - eof = getLine ( fd, buf, N_BUF ); - if (eof) goto syntax_error; - supp->caller0 = copyStr(buf); - if (!setLocationTy(&(supp->caller0), &(supp->caller0_ty))) - goto syntax_error; - - eof = getLine ( fd, buf, N_BUF ); - if (eof) goto syntax_error; - if (!STREQ(buf, "}")) { - supp->caller1 = copyStr(buf); - if (!setLocationTy(&(supp->caller1), &(supp->caller1_ty))) - goto syntax_error; - - eof = getLine ( fd, buf, N_BUF ); - if (eof) goto syntax_error; - if (!STREQ(buf, "}")) { - supp->caller2 = copyStr(buf); - if (!setLocationTy(&(supp->caller2), &(supp->caller2_ty))) - goto syntax_error; - - eof = getLine ( fd, buf, N_BUF ); - if (eof) goto syntax_error; - if (!STREQ(buf, "}")) { - supp->caller3 = copyStr(buf); - if (!setLocationTy(&(supp->caller3), &(supp->caller3_ty))) - goto syntax_error; - - eof = getLine ( fd, buf, N_BUF ); - if (eof || !STREQ(buf, "}")) goto syntax_error; - } - } - } - - supp->next = vg_suppressions; - vg_suppressions = supp; - } - - VG_(close)(fd); - return; - - syntax_error: - if (eof) { - VG_(message)(Vg_UserMsg, - "FATAL: in suppressions file `%s': unexpected EOF", - filename ); - } else { - VG_(message)(Vg_UserMsg, - "FATAL: in suppressions file `%s': syntax error on: %s", - filename, buf ); - } - VG_(close)(fd); - VG_(message)(Vg_UserMsg, "exiting now."); - VG_(exit)(1); - -# undef N_BUF -} - - -void VG_(load_suppressions) ( void ) -{ - Int i; - vg_suppressions = NULL; - for (i = 0; i < VG_(clo_n_suppressions); i++) { - if (VG_(clo_verbosity) > 1) { - VG_(message)(Vg_UserMsg, "Reading suppressions file: %s", - VG_(clo_suppressions)[i] ); - } - load_one_suppressions_file( VG_(clo_suppressions)[i] ); - } -} - - -/* Does an error context match a suppression? ie is this a - suppressible error? If so, return a pointer to the Suppression - record, otherwise NULL. - Tries to minimise the number of calls to what_fn_is_this since they - are expensive. -*/ -static Suppression* is_suppressible_error ( ErrContext* ec ) -{ -# define STREQ(s1,s2) (s1 != NULL && s2 != NULL \ - && VG_(strcmp)((s1),(s2))==0) - - Char caller0_obj[M_VG_ERRTXT]; - Char caller0_fun[M_VG_ERRTXT]; - Char caller1_obj[M_VG_ERRTXT]; - Char caller1_fun[M_VG_ERRTXT]; - Char caller2_obj[M_VG_ERRTXT]; - Char caller2_fun[M_VG_ERRTXT]; - Char caller3_obj[M_VG_ERRTXT]; - Char caller3_fun[M_VG_ERRTXT]; - - Suppression* su; - Int su_size; - - /* vg_what_fn_or_object_is_this returns: - or - or - ??? - so the strings in the suppression file should match these. - */ - - /* Initialise these strs so they are always safe to compare, even - if what_fn_or_object_is_this doesn't write anything to them. */ - caller0_obj[0] = caller1_obj[0] = caller2_obj[0] = caller3_obj[0] = 0; - caller0_fun[0] = caller1_fun[0] = caller2_obj[0] = caller3_obj[0] = 0; - - VG_(what_obj_and_fun_is_this) - ( ec->where->eips[0], caller0_obj, M_VG_ERRTXT, - caller0_fun, M_VG_ERRTXT ); - VG_(what_obj_and_fun_is_this) - ( ec->where->eips[1], caller1_obj, M_VG_ERRTXT, - caller1_fun, M_VG_ERRTXT ); - - if (VG_(clo_backtrace_size) > 2) { - VG_(what_obj_and_fun_is_this) - ( ec->where->eips[2], caller2_obj, M_VG_ERRTXT, - caller2_fun, M_VG_ERRTXT ); - - if (VG_(clo_backtrace_size) > 3) { - VG_(what_obj_and_fun_is_this) - ( ec->where->eips[3], caller3_obj, M_VG_ERRTXT, - caller3_fun, M_VG_ERRTXT ); - } - } - - /* See if the error context matches any suppression. */ - for (su = vg_suppressions; su != NULL; su = su->next) { - switch (su->skind) { - case FreeS: case PThread: - case Param: case Value0: su_size = 0; break; - case Value1: case Addr1: su_size = 1; break; - case Value2: case Addr2: su_size = 2; break; - case Value4: case Addr4: su_size = 4; break; - case Value8: case Addr8: su_size = 8; break; - default: VG_(panic)("errcontext_matches_suppression"); - } - switch (su->skind) { - case Param: - if (ec->ekind != ParamErr) continue; - if (!STREQ(su->param, ec->syscall_param)) continue; - break; - case Value0: case Value1: case Value2: case Value4: case Value8: - if (ec->ekind != ValueErr) continue; - if (ec->size != su_size) continue; - break; - case Addr1: case Addr2: case Addr4: case Addr8: - if (ec->ekind != AddrErr) continue; - if (ec->size != su_size) continue; - break; - case FreeS: - if (ec->ekind != FreeErr - && ec->ekind != FreeMismatchErr) continue; - break; - case PThread: - if (ec->ekind != PThreadErr) continue; - break; - } - - switch (su->caller0_ty) { - case ObjName: if (!VG_(stringMatch)(su->caller0, - caller0_obj)) continue; - break; - case FunName: if (!VG_(stringMatch)(su->caller0, - caller0_fun)) continue; - break; - default: goto baaaad; - } - - if (su->caller1 != NULL) { - vg_assert(VG_(clo_backtrace_size) >= 2); - switch (su->caller1_ty) { - case ObjName: if (!VG_(stringMatch)(su->caller1, - caller1_obj)) continue; - break; - case FunName: if (!VG_(stringMatch)(su->caller1, - caller1_fun)) continue; - break; - default: goto baaaad; - } - } - - if (VG_(clo_backtrace_size) > 2 && su->caller2 != NULL) { - switch (su->caller2_ty) { - case ObjName: if (!VG_(stringMatch)(su->caller2, - caller2_obj)) continue; - break; - case FunName: if (!VG_(stringMatch)(su->caller2, - caller2_fun)) continue; - break; - default: goto baaaad; - } - } - - if (VG_(clo_backtrace_size) > 3 && su->caller3 != NULL) { - switch (su->caller3_ty) { - case ObjName: if (!VG_(stringMatch)(su->caller3, - caller3_obj)) continue; - break; - case FunName: if (!VG_(stringMatch)(su->caller3, - caller3_fun)) continue; - break; - default: goto baaaad; - } - } - - return su; - } - - return NULL; - - baaaad: - VG_(panic)("is_suppressible_error"); - -# undef STREQ -} - -/*--------------------------------------------------------------------*/ -/*--- end vg_errcontext.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_execontext.c b/coregrind/vg_execontext.c deleted file mode 100644 index 4da1b31e18..0000000000 --- a/coregrind/vg_execontext.c +++ /dev/null @@ -1,258 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- Storage, and equality on, execution contexts (backtraces). ---*/ -/*--- vg_execontext.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_include.h" -#include "vg_constants.h" - - -/*------------------------------------------------------------*/ -/*--- Low-level ExeContext storage. ---*/ -/*------------------------------------------------------------*/ - -/* The idea is only to ever store any one context once, so as to save - space and make exact comparisons faster. */ - -static ExeContext* vg_ec_list[VG_N_EC_LISTS]; - -/* Stats only: the number of times the system was searched to locate a - context. */ -static UInt vg_ec_searchreqs; - -/* Stats only: the number of full context comparisons done. */ -static UInt vg_ec_searchcmps; - -/* Stats only: total number of stored contexts. */ -static UInt vg_ec_totstored; - -/* Number of 2, 4 and (fast) full cmps done. */ -static UInt vg_ec_cmp2s; -static UInt vg_ec_cmp4s; -static UInt vg_ec_cmpAlls; - - -/*------------------------------------------------------------*/ -/*--- Exported functions. ---*/ -/*------------------------------------------------------------*/ - - -/* Initialise this subsystem. */ -void VG_(init_ExeContext_storage) ( void ) -{ - Int i; - vg_ec_searchreqs = 0; - vg_ec_searchcmps = 0; - vg_ec_totstored = 0; - vg_ec_cmp2s = 0; - vg_ec_cmp4s = 0; - vg_ec_cmpAlls = 0; - for (i = 0; i < VG_N_EC_LISTS; i++) - vg_ec_list[i] = NULL; -} - - -/* Show stats. */ -void VG_(show_ExeContext_stats) ( void ) -{ - VG_(message)(Vg_DebugMsg, - "exectx: %d lists, %d contexts (avg %d per list)", - VG_N_EC_LISTS, vg_ec_totstored, - vg_ec_totstored / VG_N_EC_LISTS - ); - VG_(message)(Vg_DebugMsg, - "exectx: %d searches, %d full compares (%d per 1000)", - vg_ec_searchreqs, vg_ec_searchcmps, - vg_ec_searchreqs == 0 - ? 0 - : (UInt)( (((ULong)vg_ec_searchcmps) * 1000) - / ((ULong)vg_ec_searchreqs )) - ); - VG_(message)(Vg_DebugMsg, - "exectx: %d cmp2, %d cmp4, %d cmpAll", - vg_ec_cmp2s, vg_ec_cmp4s, vg_ec_cmpAlls - ); -} - - -/* Print an ExeContext. */ -void VG_(pp_ExeContext) ( ExeContext* e ) -{ - VG_(mini_stack_dump) ( e ); -} - - -/* Compare two ExeContexts, comparing all callers. */ -Bool VG_(eq_ExeContext_all) ( ExeContext* e1, ExeContext* e2 ) -{ - vg_ec_cmpAlls++; - /* Just do pointer comparison. */ - if (e1 != e2) return False; - return True; -} - - -/* Compare two ExeContexts, just comparing the top two callers. */ -Bool VG_(eq_ExeContext_top2) ( ExeContext* e1, ExeContext* e2 ) -{ - vg_ec_cmp2s++; - if (e1->eips[0] != e2->eips[0] - || e1->eips[1] != e2->eips[1]) return False; - return True; -} - - -/* Compare two ExeContexts, just comparing the top four callers. */ -Bool VG_(eq_ExeContext_top4) ( ExeContext* e1, ExeContext* e2 ) -{ - vg_ec_cmp4s++; - if (e1->eips[0] != e2->eips[0] - || e1->eips[1] != e2->eips[1]) return False; - - if (VG_(clo_backtrace_size) < 3) return True; - if (e1->eips[2] != e2->eips[2]) return False; - - if (VG_(clo_backtrace_size) < 4) return True; - if (e1->eips[3] != e2->eips[3]) return False; - - return True; -} - - -/* This guy is the head honcho here. Take a snapshot of the client's - stack. Search our collection of ExeContexts to see if we already - have it, and if not, allocate a new one. Either way, return a - pointer to the context. If there is a matching context we - guarantee to not allocate a new one. Thus we never store - duplicates, and so exact equality can be quickly done as equality - on the returned ExeContext* values themselves. Inspired by Hugs's - Text type. - - In order to be thread-safe, we pass in the thread's %EIP and %EBP. -*/ -ExeContext* VG_(get_ExeContext) ( Bool skip_top_frame, - Addr eip, Addr ebp ) -{ - Int i; - Addr eips[VG_DEEPEST_BACKTRACE]; - Bool same; - UInt hash; - ExeContext* new_ec; - ExeContext* list; - - VGP_PUSHCC(VgpExeContext); - - vg_assert(VG_(clo_backtrace_size) >= 2 - && VG_(clo_backtrace_size) <= VG_DEEPEST_BACKTRACE); - - /* First snaffle %EIPs from the client's stack into eips[0 - .. VG_(clo_backtrace_size)-1], putting zeroes in when the trail - goes cold. */ - - for (i = 0; i < VG_(clo_backtrace_size); i++) - eips[i] = 0; - -# define GET_CALLER(lval) \ - if (ebp != 0 && VGM_(check_readable)(ebp, 8, NULL)) { \ - lval = ((UInt*)ebp)[1]; /* ret addr */ \ - ebp = ((UInt*)ebp)[0]; /* old ebp */ \ - } else { \ - lval = ebp = 0; \ - } - - if (skip_top_frame) { - for (i = 0; i < VG_(clo_backtrace_size); i++) - GET_CALLER(eips[i]); - } else { - eips[0] = eip; - for (i = 1; i < VG_(clo_backtrace_size); i++) - GET_CALLER(eips[i]); - } -# undef GET_CALLER - - /* Now figure out if we've seen this one before. First hash it so - as to determine the list number. */ - - hash = 0; - for (i = 0; i < VG_(clo_backtrace_size); i++) { - hash ^= (UInt)eips[i]; - hash = (hash << 29) | (hash >> 3); - } - hash = hash % VG_N_EC_LISTS; - - /* And (the expensive bit) look a matching entry in the list. */ - - vg_ec_searchreqs++; - - list = vg_ec_list[hash]; - - while (True) { - if (list == NULL) break; - vg_ec_searchcmps++; - same = True; - for (i = 0; i < VG_(clo_backtrace_size); i++) { - if (list->eips[i] != eips[i]) { - same = False; - break; - } - } - if (same) break; - list = list->next; - } - - if (list != NULL) { - /* Yay! We found it. */ - VGP_POPCC; - return list; - } - - /* Bummer. We have to allocate a new context record. */ - vg_ec_totstored++; - - new_ec - = VG_(malloc)( - VG_AR_EXECTXT, - sizeof(struct _ExeContextRec *) - + VG_(clo_backtrace_size) * sizeof(Addr) - ); - - for (i = 0; i < VG_(clo_backtrace_size); i++) - new_ec->eips[i] = eips[i]; - - new_ec->next = vg_ec_list[hash]; - vg_ec_list[hash] = new_ec; - - VGP_POPCC; - return new_ec; -} - - -/*--------------------------------------------------------------------*/ -/*--- end vg_execontext.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_from_ucode.c b/coregrind/vg_from_ucode.c deleted file mode 100644 index 573ee93271..0000000000 --- a/coregrind/vg_from_ucode.c +++ /dev/null @@ -1,2647 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- The JITter: translate ucode back to x86 code. ---*/ -/*--- vg_from_ucode.c ---*/ -/*--------------------------------------------------------------------*/ -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_include.h" - - -/*------------------------------------------------------------*/ -/*--- Renamings of frequently-used global functions. ---*/ -/*------------------------------------------------------------*/ - -#define dis VG_(disassemble) -#define nameIReg VG_(nameOfIntReg) -#define nameISize VG_(nameOfIntSize) - - -/*------------------------------------------------------------*/ -/*--- Instruction emission -- turning final uinstrs back ---*/ -/*--- into x86 code. ---*/ -/*------------------------------------------------------------*/ - -/* [2001-07-08 This comment is now somewhat out of date.] - - This is straightforward but for one thing: to facilitate generating - code in a single pass, we generate position-independent code. To - do this, calls and jmps to fixed addresses must specify the address - by first loading it into a register, and jump to/call that - register. Fortunately, the only jump to a literal is the jump back - to vg_dispatch, and only %eax is live then, conveniently. Ucode - call insns may only have a register as target anyway, so there's no - need to do anything fancy for them. - - The emit_* routines constitute the lowest level of instruction - emission. They simply emit the sequence of bytes corresponding to - the relevant instruction, with no further ado. In particular there - is no checking about whether uses of byte registers makes sense, - nor whether shift insns have their first operand in %cl, etc. - - These issues are taken care of by the level above, the synth_* - routines. These detect impossible operand combinations and turn - them into sequences of legal instructions. Finally, emitUInstr is - phrased in terms of the synth_* abstraction layer. */ - -static UChar* emitted_code; -static Int emitted_code_used; -static Int emitted_code_size; - -static void expandEmittedCode ( void ) -{ - Int i; - UChar* tmp = VG_(jitmalloc)(2 * emitted_code_size); - /* VG_(printf)("expand to %d\n", 2 * emitted_code_size); */ - for (i = 0; i < emitted_code_size; i++) - tmp[i] = emitted_code[i]; - VG_(jitfree)(emitted_code); - emitted_code = tmp; - emitted_code_size *= 2; -} - -static __inline__ void emitB ( UInt b ) -{ - if (dis) { - if (b < 16) VG_(printf)("0%x ", b); else VG_(printf)("%2x ", b); - } - if (emitted_code_used == emitted_code_size) - expandEmittedCode(); - - emitted_code[emitted_code_used] = (UChar)b; - emitted_code_used++; -} - -static __inline__ void emitW ( UInt l ) -{ - emitB ( (l) & 0x000000FF ); - emitB ( (l >> 8) & 0x000000FF ); -} - -static __inline__ void emitL ( UInt l ) -{ - emitB ( (l) & 0x000000FF ); - emitB ( (l >> 8) & 0x000000FF ); - emitB ( (l >> 16) & 0x000000FF ); - emitB ( (l >> 24) & 0x000000FF ); -} - -static __inline__ void newEmit ( void ) -{ - if (dis) - VG_(printf)("\t %4d: ", emitted_code_used ); -} - -/* Is this a callee-save register, in the normal C calling convention? */ -#define VG_CALLEE_SAVED(reg) (reg == R_EBX || reg == R_ESI || reg == R_EDI) - - -/*----------------------------------------------------*/ -/*--- Addressing modes ---*/ -/*----------------------------------------------------*/ - -static __inline__ UChar mkModRegRM ( UChar mod, UChar reg, UChar regmem ) -{ - return ((mod & 3) << 6) | ((reg & 7) << 3) | (regmem & 7); -} - -static __inline__ UChar mkSIB ( Int scale, Int regindex, Int regbase ) -{ - Int shift; - switch (scale) { - case 1: shift = 0; break; - case 2: shift = 1; break; - case 4: shift = 2; break; - case 8: shift = 3; break; - default: VG_(panic)( "mkSIB" ); - } - return ((shift & 3) << 6) | ((regindex & 7) << 3) | (regbase & 7); -} - -static __inline__ void emit_amode_litmem_reg ( Addr addr, Int reg ) -{ - /* ($ADDR), reg */ - emitB ( mkModRegRM(0, reg, 5) ); - emitL ( addr ); -} - -static __inline__ void emit_amode_regmem_reg ( Int regmem, Int reg ) -{ - /* (regmem), reg */ - if (regmem == R_ESP) - VG_(panic)("emit_amode_regmem_reg"); - if (regmem == R_EBP) { - emitB ( mkModRegRM(1, reg, 5) ); - emitB ( 0x00 ); - } else { - emitB( mkModRegRM(0, reg, regmem) ); - } -} - -static __inline__ void emit_amode_offregmem_reg ( Int off, Int regmem, Int reg ) -{ - if (regmem == R_ESP) - VG_(panic)("emit_amode_offregmem_reg(ESP)"); - if (off < -128 || off > 127) { - /* Use a large offset */ - /* d32(regmem), reg */ - emitB ( mkModRegRM(2, reg, regmem) ); - emitL ( off ); - } else { - /* d8(regmem), reg */ - emitB ( mkModRegRM(1, reg, regmem) ); - emitB ( off & 0xFF ); - } -} - -static __inline__ void emit_amode_sib_reg ( Int off, Int scale, Int regbase, - Int regindex, Int reg ) -{ - if (regindex == R_ESP) - VG_(panic)("emit_amode_sib_reg(ESP)"); - if (off < -128 || off > 127) { - /* Use a 32-bit offset */ - emitB ( mkModRegRM(2, reg, 4) ); /* SIB with 32-bit displacement */ - emitB ( mkSIB( scale, regindex, regbase ) ); - emitL ( off ); - } else { - /* Use an 8-bit offset */ - emitB ( mkModRegRM(1, reg, 4) ); /* SIB with 8-bit displacement */ - emitB ( mkSIB( scale, regindex, regbase ) ); - emitB ( off & 0xFF ); - } -} - -static __inline__ void emit_amode_ereg_greg ( Int e_reg, Int g_reg ) -{ - /* other_reg, reg */ - emitB ( mkModRegRM(3, g_reg, e_reg) ); -} - -static __inline__ void emit_amode_greg_ereg ( Int g_reg, Int e_reg ) -{ - /* other_reg, reg */ - emitB ( mkModRegRM(3, g_reg, e_reg) ); -} - - -/*----------------------------------------------------*/ -/*--- Opcode translation ---*/ -/*----------------------------------------------------*/ - -static __inline__ Int mkGrp1opcode ( Opcode opc ) -{ - switch (opc) { - case ADD: return 0; - case OR: return 1; - case ADC: return 2; - case SBB: return 3; - case AND: return 4; - case SUB: return 5; - case XOR: return 6; - default: VG_(panic)("mkGrp1opcode"); - } -} - -static __inline__ Int mkGrp2opcode ( Opcode opc ) -{ - switch (opc) { - case ROL: return 0; - case ROR: return 1; - case RCL: return 2; - case RCR: return 3; - case SHL: return 4; - case SHR: return 5; - case SAR: return 7; - default: VG_(panic)("mkGrp2opcode"); - } -} - -static __inline__ Int mkGrp3opcode ( Opcode opc ) -{ - switch (opc) { - case NOT: return 2; - case NEG: return 3; - default: VG_(panic)("mkGrp3opcode"); - } -} - -static __inline__ Int mkGrp4opcode ( Opcode opc ) -{ - switch (opc) { - case INC: return 0; - case DEC: return 1; - default: VG_(panic)("mkGrp4opcode"); - } -} - -static __inline__ Int mkGrp5opcode ( Opcode opc ) -{ - switch (opc) { - case CALLM: return 2; - case JMP: return 4; - default: VG_(panic)("mkGrp5opcode"); - } -} - -static __inline__ UChar mkPrimaryOpcode ( Opcode opc ) -{ - switch (opc) { - case ADD: return 0x00; - case ADC: return 0x10; - case AND: return 0x20; - case XOR: return 0x30; - case OR: return 0x08; - case SBB: return 0x18; - case SUB: return 0x28; - default: VG_(panic)("mkPrimaryOpcode"); - } -} - -/*----------------------------------------------------*/ -/*--- v-size (4, or 2 with OSO) insn emitters ---*/ -/*----------------------------------------------------*/ - -static void emit_movv_offregmem_reg ( Int sz, Int off, Int areg, Int reg ) -{ - newEmit(); - if (sz == 2) emitB ( 0x66 ); - emitB ( 0x8B ); /* MOV Ev, Gv */ - emit_amode_offregmem_reg ( off, areg, reg ); - if (dis) - VG_(printf)( "\n\t\tmov%c\t0x%x(%s), %s\n", - nameISize(sz), off, nameIReg(4,areg), nameIReg(sz,reg)); -} - -static void emit_movv_reg_offregmem ( Int sz, Int reg, Int off, Int areg ) -{ - newEmit(); - if (sz == 2) emitB ( 0x66 ); - emitB ( 0x89 ); /* MOV Gv, Ev */ - emit_amode_offregmem_reg ( off, areg, reg ); - if (dis) - VG_(printf)( "\n\t\tmov%c\t%s, 0x%x(%s)\n", - nameISize(sz), nameIReg(sz,reg), off, nameIReg(4,areg)); -} - -static void emit_movv_regmem_reg ( Int sz, Int reg1, Int reg2 ) -{ - newEmit(); - if (sz == 2) emitB ( 0x66 ); - emitB ( 0x8B ); /* MOV Ev, Gv */ - emit_amode_regmem_reg ( reg1, reg2 ); - if (dis) - VG_(printf)( "\n\t\tmov%c\t(%s), %s\n", - nameISize(sz), nameIReg(4,reg1), nameIReg(sz,reg2)); -} - -static void emit_movv_reg_regmem ( Int sz, Int reg1, Int reg2 ) -{ - newEmit(); - if (sz == 2) emitB ( 0x66 ); - emitB ( 0x89 ); /* MOV Gv, Ev */ - emit_amode_regmem_reg ( reg2, reg1 ); - if (dis) - VG_(printf)( "\n\t\tmov%c\t%s, (%s)\n", - nameISize(sz), nameIReg(sz,reg1), nameIReg(4,reg2)); -} - -static void emit_movv_reg_reg ( Int sz, Int reg1, Int reg2 ) -{ - newEmit(); - if (sz == 2) emitB ( 0x66 ); - emitB ( 0x89 ); /* MOV Gv, Ev */ - emit_amode_ereg_greg ( reg2, reg1 ); - if (dis) - VG_(printf)( "\n\t\tmov%c\t%s, %s\n", - nameISize(sz), nameIReg(sz,reg1), nameIReg(sz,reg2)); -} - -static void emit_nonshiftopv_lit_reg ( Int sz, Opcode opc, - UInt lit, Int reg ) -{ - newEmit(); - if (sz == 2) emitB ( 0x66 ); - if (lit == VG_(extend_s_8to32)(lit & 0x000000FF)) { - /* short form OK */ - emitB ( 0x83 ); /* Grp1 Ib,Ev */ - emit_amode_ereg_greg ( reg, mkGrp1opcode(opc) ); - emitB ( lit & 0x000000FF ); - } else { - emitB ( 0x81 ); /* Grp1 Iv,Ev */ - emit_amode_ereg_greg ( reg, mkGrp1opcode(opc) ); - if (sz == 2) emitW ( lit ); else emitL ( lit ); - } - if (dis) - VG_(printf)( "\n\t\t%s%c\t$0x%x, %s\n", - VG_(nameUOpcode)(False,opc), nameISize(sz), - lit, nameIReg(sz,reg)); -} - -static void emit_shiftopv_lit_reg ( Int sz, Opcode opc, UInt lit, Int reg ) -{ - newEmit(); - if (sz == 2) emitB ( 0x66 ); - emitB ( 0xC1 ); /* Grp2 Ib,Ev */ - emit_amode_ereg_greg ( reg, mkGrp2opcode(opc) ); - emitB ( lit ); - if (dis) - VG_(printf)( "\n\t\t%s%c\t$%d, %s\n", - VG_(nameUOpcode)(False,opc), nameISize(sz), - lit, nameIReg(sz,reg)); -} - -static void emit_shiftopv_cl_stack0 ( Int sz, Opcode opc ) -{ - newEmit(); - if (sz == 2) emitB ( 0x66 ); - emitB ( 0xD3 ); /* Grp2 CL,Ev */ - emitB ( mkModRegRM ( 1, mkGrp2opcode(opc), 4 ) ); - emitB ( 0x24 ); /* a SIB, I think `d8(%esp)' */ - emitB ( 0x00 ); /* the d8 displacement */ - if (dis) - VG_(printf)("\n\t\t%s%c %%cl, 0(%%esp)\n", - VG_(nameUOpcode)(False,opc), nameISize(sz) ); -} - -static void emit_shiftopb_cl_stack0 ( Opcode opc ) -{ - newEmit(); - emitB ( 0xD2 ); /* Grp2 CL,Eb */ - emitB ( mkModRegRM ( 1, mkGrp2opcode(opc), 4 ) ); - emitB ( 0x24 ); /* a SIB, I think `d8(%esp)' */ - emitB ( 0x00 ); /* the d8 displacement */ - if (dis) - VG_(printf)("\n\t\t%s%c %%cl, 0(%%esp)\n", - VG_(nameUOpcode)(False,opc), nameISize(1) ); -} - -static void emit_nonshiftopv_offregmem_reg ( Int sz, Opcode opc, - Int off, Int areg, Int reg ) -{ - newEmit(); - if (sz == 2) emitB ( 0x66 ); - emitB ( 3 + mkPrimaryOpcode(opc) ); /* op Ev, Gv */ - emit_amode_offregmem_reg ( off, areg, reg ); - if (dis) - VG_(printf)( "\n\t\t%s%c\t0x%x(%s), %s\n", - VG_(nameUOpcode)(False,opc), nameISize(sz), - off, nameIReg(4,areg), nameIReg(sz,reg)); -} - -static void emit_nonshiftopv_reg_reg ( Int sz, Opcode opc, - Int reg1, Int reg2 ) -{ - newEmit(); - if (sz == 2) emitB ( 0x66 ); -# if 0 - /* Perfectly correct, but the GNU assembler uses the other form. - Therefore we too use the other form, to aid verification. */ - emitB ( 3 + mkPrimaryOpcode(opc) ); /* op Ev, Gv */ - emit_amode_ereg_greg ( reg1, reg2 ); -# else - emitB ( 1 + mkPrimaryOpcode(opc) ); /* op Gv, Ev */ - emit_amode_greg_ereg ( reg1, reg2 ); -# endif - if (dis) - VG_(printf)( "\n\t\t%s%c\t%s, %s\n", - VG_(nameUOpcode)(False,opc), nameISize(sz), - nameIReg(sz,reg1), nameIReg(sz,reg2)); -} - -static void emit_movv_lit_reg ( Int sz, UInt lit, Int reg ) -{ - if (lit == 0) { - emit_nonshiftopv_reg_reg ( sz, XOR, reg, reg ); - return; - } - newEmit(); - if (sz == 2) emitB ( 0x66 ); - emitB ( 0xB8+reg ); /* MOV imm, Gv */ - if (sz == 2) emitW ( lit ); else emitL ( lit ); - if (dis) - VG_(printf)( "\n\t\tmov%c\t$0x%x, %s\n", - nameISize(sz), lit, nameIReg(sz,reg)); -} - -static void emit_unaryopv_reg ( Int sz, Opcode opc, Int reg ) -{ - newEmit(); - if (sz == 2) emitB ( 0x66 ); - switch (opc) { - case NEG: - emitB ( 0xF7 ); - emit_amode_ereg_greg ( reg, mkGrp3opcode(NEG) ); - if (dis) - VG_(printf)( "\n\t\tneg%c\t%s\n", - nameISize(sz), nameIReg(sz,reg)); - break; - case NOT: - emitB ( 0xF7 ); - emit_amode_ereg_greg ( reg, mkGrp3opcode(NOT) ); - if (dis) - VG_(printf)( "\n\t\tnot%c\t%s\n", - nameISize(sz), nameIReg(sz,reg)); - break; - case DEC: - emitB ( 0x48 + reg ); - if (dis) - VG_(printf)( "\n\t\tdec%c\t%s\n", - nameISize(sz), nameIReg(sz,reg)); - break; - case INC: - emitB ( 0x40 + reg ); - if (dis) - VG_(printf)( "\n\t\tinc%c\t%s\n", - nameISize(sz), nameIReg(sz,reg)); - break; - default: - VG_(panic)("emit_unaryopv_reg"); - } -} - -static void emit_pushv_reg ( Int sz, Int reg ) -{ - newEmit(); - if (sz == 2) { - emitB ( 0x66 ); - } else { - vg_assert(sz == 4); - } - emitB ( 0x50 + reg ); - if (dis) - VG_(printf)("\n\t\tpush%c %s\n", nameISize(sz), nameIReg(sz,reg)); -} - -static void emit_popv_reg ( Int sz, Int reg ) -{ - newEmit(); - if (sz == 2) { - emitB ( 0x66 ); - } else { - vg_assert(sz == 4); - } - emitB ( 0x58 + reg ); - if (dis) - VG_(printf)("\n\t\tpop%c %s\n", nameISize(sz), nameIReg(sz,reg)); -} - -static void emit_pushl_lit8 ( Int lit8 ) -{ - vg_assert(lit8 >= -128 && lit8 < 128); - newEmit(); - emitB ( 0x6A ); - emitB ( (UChar)((UInt)lit8) ); - if (dis) - VG_(printf)("\n\t\tpushl $%d\n", lit8 ); -} - -static void emit_pushl_lit32 ( UInt int32 ) -{ - newEmit(); - emitB ( 0x68 ); - emitL ( int32 ); - if (dis) - VG_(printf)("\n\t\tpushl $0x%x\n", int32 ); -} - -static void emit_cmpl_zero_reg ( Int reg ) -{ - newEmit(); - emitB ( 0x83 ); - emit_amode_ereg_greg ( reg, 7 /* Grp 3 opcode for CMP */ ); - emitB ( 0x00 ); - if (dis) - VG_(printf)("\n\t\tcmpl $0, %s\n", nameIReg(4,reg)); -} - -static void emit_swapl_reg_ECX ( Int reg ) -{ - newEmit(); - emitB ( 0x87 ); /* XCHG Gv,Ev */ - emit_amode_ereg_greg ( reg, R_ECX ); - if (dis) - VG_(printf)("\n\t\txchgl %%ecx, %s\n", nameIReg(4,reg)); -} - -static void emit_swapl_reg_EAX ( Int reg ) -{ - newEmit(); - emitB ( 0x90 + reg ); /* XCHG Gv,eAX */ - if (dis) - VG_(printf)("\n\t\txchgl %%eax, %s\n", nameIReg(4,reg)); -} - -static void emit_swapl_reg_reg ( Int reg1, Int reg2 ) -{ - newEmit(); - emitB ( 0x87 ); /* XCHG Gv,Ev */ - emit_amode_ereg_greg ( reg1, reg2 ); - if (dis) - VG_(printf)("\n\t\txchgl %s, %s\n", nameIReg(4,reg1), - nameIReg(4,reg2)); -} - -static void emit_bswapl_reg ( Int reg ) -{ - newEmit(); - emitB ( 0x0F ); - emitB ( 0xC8 + reg ); /* BSWAP r32 */ - if (dis) - VG_(printf)("\n\t\tbswapl %s\n", nameIReg(4,reg)); -} - -static void emit_movl_reg_reg ( Int regs, Int regd ) -{ - newEmit(); - emitB ( 0x89 ); /* MOV Gv,Ev */ - emit_amode_ereg_greg ( regd, regs ); - if (dis) - VG_(printf)("\n\t\tmovl %s, %s\n", nameIReg(4,regs), nameIReg(4,regd)); -} - -static void emit_testv_lit_reg ( Int sz, UInt lit, Int reg ) -{ - newEmit(); - if (sz == 2) { - emitB ( 0x66 ); - } else { - vg_assert(sz == 4); - } - emitB ( 0xF7 ); /* Grp3 Ev */ - emit_amode_ereg_greg ( reg, 0 /* Grp3 subopcode for TEST */ ); - if (sz == 2) emitW ( lit ); else emitL ( lit ); - if (dis) - VG_(printf)("\n\t\ttest%c $0x%x, %s\n", nameISize(sz), - lit, nameIReg(sz,reg)); -} - -static void emit_testv_lit_offregmem ( Int sz, UInt lit, Int off, Int reg ) -{ - newEmit(); - if (sz == 2) { - emitB ( 0x66 ); - } else { - vg_assert(sz == 4); - } - emitB ( 0xF7 ); /* Grp3 Ev */ - emit_amode_offregmem_reg ( off, reg, 0 /* Grp3 subopcode for TEST */ ); - if (sz == 2) emitW ( lit ); else emitL ( lit ); - if (dis) - VG_(printf)("\n\t\ttest%c $%d, 0x%x(%s)\n", - nameISize(sz), lit, off, nameIReg(4,reg) ); -} - -static void emit_movv_lit_offregmem ( Int sz, UInt lit, Int off, Int memreg ) -{ - newEmit(); - if (sz == 2) { - emitB ( 0x66 ); - } else { - vg_assert(sz == 4); - } - emitB ( 0xC7 ); /* Grp11 Ev */ - emit_amode_offregmem_reg ( off, memreg, 0 /* Grp11 subopcode for MOV */ ); - if (sz == 2) emitW ( lit ); else emitL ( lit ); - if (dis) - VG_(printf)( "\n\t\tmov%c\t$0x%x, 0x%x(%s)\n", - nameISize(sz), lit, off, nameIReg(4,memreg) ); -} - - -/*----------------------------------------------------*/ -/*--- b-size (1 byte) instruction emitters ---*/ -/*----------------------------------------------------*/ - -/* There is some doubt as to whether C6 (Grp 11) is in the - 486 insn set. ToDo: investigate. */ -static void emit_movb_lit_offregmem ( UInt lit, Int off, Int memreg ) -{ - newEmit(); - emitB ( 0xC6 ); /* Grp11 Eb */ - emit_amode_offregmem_reg ( off, memreg, 0 /* Grp11 subopcode for MOV */ ); - emitB ( lit ); - if (dis) - VG_(printf)( "\n\t\tmovb\t$0x%x, 0x%x(%s)\n", - lit, off, nameIReg(4,memreg) ); -} - -static void emit_nonshiftopb_offregmem_reg ( Opcode opc, - Int off, Int areg, Int reg ) -{ - newEmit(); - emitB ( 2 + mkPrimaryOpcode(opc) ); /* op Eb, Gb */ - emit_amode_offregmem_reg ( off, areg, reg ); - if (dis) - VG_(printf)( "\n\t\t%sb\t0x%x(%s), %s\n", - VG_(nameUOpcode)(False,opc), off, nameIReg(4,areg), - nameIReg(1,reg)); -} - -static void emit_movb_reg_offregmem ( Int reg, Int off, Int areg ) -{ - /* Could do better when reg == %al. */ - newEmit(); - emitB ( 0x88 ); /* MOV G1, E1 */ - emit_amode_offregmem_reg ( off, areg, reg ); - if (dis) - VG_(printf)( "\n\t\tmovb\t%s, 0x%x(%s)\n", - nameIReg(1,reg), off, nameIReg(4,areg)); -} - -static void emit_nonshiftopb_reg_reg ( Opcode opc, Int reg1, Int reg2 ) -{ - newEmit(); - emitB ( 2 + mkPrimaryOpcode(opc) ); /* op Eb, Gb */ - emit_amode_ereg_greg ( reg1, reg2 ); - if (dis) - VG_(printf)( "\n\t\t%sb\t%s, %s\n", - VG_(nameUOpcode)(False,opc), - nameIReg(1,reg1), nameIReg(1,reg2)); -} - -static void emit_movb_reg_regmem ( Int reg1, Int reg2 ) -{ - newEmit(); - emitB ( 0x88 ); /* MOV G1, E1 */ - emit_amode_regmem_reg ( reg2, reg1 ); - if (dis) - VG_(printf)( "\n\t\tmovb\t%s, (%s)\n", nameIReg(1,reg1), - nameIReg(4,reg2)); -} - -static void emit_nonshiftopb_lit_reg ( Opcode opc, UInt lit, Int reg ) -{ - newEmit(); - emitB ( 0x80 ); /* Grp1 Ib,Eb */ - emit_amode_ereg_greg ( reg, mkGrp1opcode(opc) ); - emitB ( lit & 0x000000FF ); - if (dis) - VG_(printf)( "\n\t\t%sb\t$0x%x, %s\n", VG_(nameUOpcode)(False,opc), - lit, nameIReg(1,reg)); -} - -static void emit_shiftopb_lit_reg ( Opcode opc, UInt lit, Int reg ) -{ - newEmit(); - emitB ( 0xC0 ); /* Grp2 Ib,Eb */ - emit_amode_ereg_greg ( reg, mkGrp2opcode(opc) ); - emitB ( lit ); - if (dis) - VG_(printf)( "\n\t\t%sb\t$%d, %s\n", - VG_(nameUOpcode)(False,opc), - lit, nameIReg(1,reg)); -} - -static void emit_unaryopb_reg ( Opcode opc, Int reg ) -{ - newEmit(); - switch (opc) { - case INC: - emitB ( 0xFE ); - emit_amode_ereg_greg ( reg, mkGrp4opcode(INC) ); - if (dis) - VG_(printf)( "\n\t\tincb\t%s\n", nameIReg(1,reg)); - break; - case DEC: - emitB ( 0xFE ); - emit_amode_ereg_greg ( reg, mkGrp4opcode(DEC) ); - if (dis) - VG_(printf)( "\n\t\tdecb\t%s\n", nameIReg(1,reg)); - break; - case NOT: - emitB ( 0xF6 ); - emit_amode_ereg_greg ( reg, mkGrp3opcode(NOT) ); - if (dis) - VG_(printf)( "\n\t\tnotb\t%s\n", nameIReg(1,reg)); - break; - case NEG: - emitB ( 0xF6 ); - emit_amode_ereg_greg ( reg, mkGrp3opcode(NEG) ); - if (dis) - VG_(printf)( "\n\t\tnegb\t%s\n", nameIReg(1,reg)); - break; - default: - VG_(panic)("emit_unaryopb_reg"); - } -} - -static void emit_testb_lit_reg ( UInt lit, Int reg ) -{ - newEmit(); - emitB ( 0xF6 ); /* Grp3 Eb */ - emit_amode_ereg_greg ( reg, 0 /* Grp3 subopcode for TEST */ ); - emitB ( lit ); - if (dis) - VG_(printf)("\n\t\ttestb $0x%x, %s\n", lit, nameIReg(1,reg)); -} - - -/*----------------------------------------------------*/ -/*--- zero-extended load emitters ---*/ -/*----------------------------------------------------*/ - -static void emit_movzbl_offregmem_reg ( Int off, Int regmem, Int reg ) -{ - newEmit(); - emitB ( 0x0F ); emitB ( 0xB6 ); /* MOVZBL */ - emit_amode_offregmem_reg ( off, regmem, reg ); - if (dis) - VG_(printf)( "\n\t\tmovzbl\t0x%x(%s), %s\n", - off, nameIReg(4,regmem), nameIReg(4,reg)); -} - -static void emit_movzbl_regmem_reg ( Int reg1, Int reg2 ) -{ - newEmit(); - emitB ( 0x0F ); emitB ( 0xB6 ); /* MOVZBL */ - emit_amode_regmem_reg ( reg1, reg2 ); - if (dis) - VG_(printf)( "\n\t\tmovzbl\t(%s), %s\n", nameIReg(4,reg1), - nameIReg(4,reg2)); -} - -static void emit_movzwl_offregmem_reg ( Int off, Int areg, Int reg ) -{ - newEmit(); - emitB ( 0x0F ); emitB ( 0xB7 ); /* MOVZWL */ - emit_amode_offregmem_reg ( off, areg, reg ); - if (dis) - VG_(printf)( "\n\t\tmovzwl\t0x%x(%s), %s\n", - off, nameIReg(4,areg), nameIReg(4,reg)); -} - -static void emit_movzwl_regmem_reg ( Int reg1, Int reg2 ) -{ - newEmit(); - emitB ( 0x0F ); emitB ( 0xB7 ); /* MOVZWL */ - emit_amode_regmem_reg ( reg1, reg2 ); - if (dis) - VG_(printf)( "\n\t\tmovzwl\t(%s), %s\n", nameIReg(4,reg1), - nameIReg(4,reg2)); -} - -/*----------------------------------------------------*/ -/*--- FPU instruction emitters ---*/ -/*----------------------------------------------------*/ - -static void emit_get_fpu_state ( void ) -{ - Int off = 4 * VGOFF_(m_fpustate); - newEmit(); - emitB ( 0xDD ); emitB ( 0xA5 ); /* frstor d32(%ebp) */ - emitL ( off ); - if (dis) - VG_(printf)("\n\t\tfrstor\t%d(%%ebp)\n", off ); -} - -static void emit_put_fpu_state ( void ) -{ - Int off = 4 * VGOFF_(m_fpustate); - newEmit(); - emitB ( 0xDD ); emitB ( 0xB5 ); /* fnsave d32(%ebp) */ - emitL ( off ); - if (dis) - VG_(printf)("\n\t\tfnsave\t%d(%%ebp)\n", off ); -} - -static void emit_fpu_no_mem ( UChar first_byte, - UChar second_byte ) -{ - newEmit(); - emitB ( first_byte ); - emitB ( second_byte ); - if (dis) - VG_(printf)("\n\t\tfpu-0x%x:0x%x\n", - (UInt)first_byte, (UInt)second_byte ); -} - -static void emit_fpu_regmem ( UChar first_byte, - UChar second_byte_masked, - Int reg ) -{ - newEmit(); - emitB ( first_byte ); - emit_amode_regmem_reg ( reg, second_byte_masked >> 3 ); - if (dis) - VG_(printf)("\n\t\tfpu-0x%x:0x%x-(%s)\n", - (UInt)first_byte, (UInt)second_byte_masked, - nameIReg(4,reg) ); -} - - -/*----------------------------------------------------*/ -/*--- misc instruction emitters ---*/ -/*----------------------------------------------------*/ - -static void emit_call_reg ( Int reg ) -{ - newEmit(); - emitB ( 0xFF ); /* Grp5 */ - emit_amode_ereg_greg ( reg, mkGrp5opcode(CALLM) ); - if (dis) - VG_(printf)( "\n\t\tcall\t*%s\n", nameIReg(4,reg) ); -} - - -static void emit_call_star_EBP_off ( Int byte_off ) -{ - newEmit(); - if (byte_off < -128 || byte_off > 127) { - emitB ( 0xFF ); - emitB ( 0x95 ); - emitL ( byte_off ); - } else { - emitB ( 0xFF ); - emitB ( 0x55 ); - emitB ( byte_off ); - } - if (dis) - VG_(printf)( "\n\t\tcall * %d(%%ebp)\n", byte_off ); -} - - -static void emit_addlit8_offregmem ( Int lit8, Int regmem, Int off ) -{ - vg_assert(lit8 >= -128 && lit8 < 128); - newEmit(); - emitB ( 0x83 ); /* Grp1 Ib,Ev */ - emit_amode_offregmem_reg ( off, regmem, - 0 /* Grp1 subopcode for ADD */ ); - emitB ( lit8 & 0xFF ); - if (dis) - VG_(printf)( "\n\t\taddl $%d, %d(%s)\n", lit8, off, - nameIReg(4,regmem)); -} - - -static void emit_add_lit_to_esp ( Int lit ) -{ - if (lit < -128 || lit > 127) VG_(panic)("emit_add_lit_to_esp"); - newEmit(); - emitB ( 0x83 ); - emitB ( 0xC4 ); - emitB ( lit & 0xFF ); - if (dis) - VG_(printf)( "\n\t\taddl $%d, %%esp\n", lit ); -} - - -static void emit_movb_AL_zeroESPmem ( void ) -{ - /* movb %al, 0(%esp) */ - /* 88442400 movb %al, 0(%esp) */ - newEmit(); - emitB ( 0x88 ); - emitB ( 0x44 ); - emitB ( 0x24 ); - emitB ( 0x00 ); - if (dis) - VG_(printf)( "\n\t\tmovb %%al, 0(%%esp)\n" ); -} - -static void emit_movb_zeroESPmem_AL ( void ) -{ - /* movb 0(%esp), %al */ - /* 8A442400 movb 0(%esp), %al */ - newEmit(); - emitB ( 0x8A ); - emitB ( 0x44 ); - emitB ( 0x24 ); - emitB ( 0x00 ); - if (dis) - VG_(printf)( "\n\t\tmovb 0(%%esp), %%al\n" ); -} - - -/* Emit a jump short with an 8-bit signed offset. Note that the - offset is that which should be added to %eip once %eip has been - advanced over this insn. */ -static void emit_jcondshort_delta ( Condcode cond, Int delta ) -{ - vg_assert(delta >= -128 && delta <= 127); - newEmit(); - emitB ( 0x70 + (UInt)cond ); - emitB ( (UChar)delta ); - if (dis) - VG_(printf)( "\n\t\tj%s-8\t%%eip+%d\n", - VG_(nameCondcode)(cond), delta ); -} - -static void emit_get_eflags ( void ) -{ - Int off = 4 * VGOFF_(m_eflags); - vg_assert(off >= 0 && off < 128); - newEmit(); - emitB ( 0xFF ); /* PUSHL off(%ebp) */ - emitB ( 0x75 ); - emitB ( off ); - emitB ( 0x9D ); /* POPFL */ - if (dis) - VG_(printf)( "\n\t\tpushl %d(%%ebp) ; popfl\n", off ); -} - -static void emit_put_eflags ( void ) -{ - Int off = 4 * VGOFF_(m_eflags); - vg_assert(off >= 0 && off < 128); - newEmit(); - emitB ( 0x9C ); /* PUSHFL */ - emitB ( 0x8F ); /* POPL vg_m_state.m_eflags */ - emitB ( 0x45 ); - emitB ( off ); - if (dis) - VG_(printf)( "\n\t\tpushfl ; popl %d(%%ebp)\n", off ); -} - -static void emit_setb_reg ( Int reg, Condcode cond ) -{ - newEmit(); - emitB ( 0x0F ); emitB ( 0x90 + (UChar)cond ); - emit_amode_ereg_greg ( reg, 0 ); - if (dis) - VG_(printf)("\n\t\tset%s %s\n", - VG_(nameCondcode)(cond), nameIReg(1,reg)); -} - -static void emit_ret ( void ) -{ - newEmit(); - emitB ( 0xC3 ); /* RET */ - if (dis) - VG_(printf)("\n\t\tret\n"); -} - -static void emit_pushal ( void ) -{ - newEmit(); - emitB ( 0x60 ); /* PUSHAL */ - if (dis) - VG_(printf)("\n\t\tpushal\n"); -} - -static void emit_popal ( void ) -{ - newEmit(); - emitB ( 0x61 ); /* POPAL */ - if (dis) - VG_(printf)("\n\t\tpopal\n"); -} - -static void emit_lea_litreg_reg ( UInt lit, Int regmem, Int reg ) -{ - newEmit(); - emitB ( 0x8D ); /* LEA M,Gv */ - emit_amode_offregmem_reg ( (Int)lit, regmem, reg ); - if (dis) - VG_(printf)("\n\t\tleal 0x%x(%s), %s\n", - lit, nameIReg(4,regmem), nameIReg(4,reg) ); -} - -static void emit_lea_sib_reg ( UInt lit, Int scale, - Int regbase, Int regindex, Int reg ) -{ - newEmit(); - emitB ( 0x8D ); /* LEA M,Gv */ - emit_amode_sib_reg ( (Int)lit, scale, regbase, regindex, reg ); - if (dis) - VG_(printf)("\n\t\tleal 0x%x(%s,%s,%d), %s\n", - lit, nameIReg(4,regbase), - nameIReg(4,regindex), scale, - nameIReg(4,reg) ); -} - -static void emit_AMD_prefetch_reg ( Int reg ) -{ - newEmit(); - emitB ( 0x0F ); - emitB ( 0x0D ); - emit_amode_regmem_reg ( reg, 1 /* 0 is prefetch; 1 is prefetchw */ ); - if (dis) - VG_(printf)("\n\t\tamd-prefetch (%s)\n", nameIReg(4,reg) ); -} - -/*----------------------------------------------------*/ -/*--- Instruction synthesisers ---*/ -/*----------------------------------------------------*/ - -static Condcode invertCondition ( Condcode cond ) -{ - return (Condcode)(1 ^ (UInt)cond); -} - - -/* Synthesise a call to *baseBlock[offset], ie, - call * (4 x offset)(%ebp). -*/ -static void synth_call_baseBlock_method ( Bool ensure_shortform, - Int word_offset ) -{ - vg_assert(word_offset >= 0); - vg_assert(word_offset < VG_BASEBLOCK_WORDS); - if (ensure_shortform) - vg_assert(word_offset < 32); - emit_call_star_EBP_off ( 4 * word_offset ); -} - - -static void load_ebp_from_JmpKind ( JmpKind jmpkind ) -{ - switch (jmpkind) { - case JmpBoring: - break; - case JmpCall: - case JmpRet: - emit_movv_lit_reg ( 4, VG_TRC_EBP_JMP_STKADJ, R_EBP ); - break; - case JmpSyscall: - emit_movv_lit_reg ( 4, VG_TRC_EBP_JMP_SYSCALL, R_EBP ); - break; - case JmpClientReq: - emit_movv_lit_reg ( 4, VG_TRC_EBP_JMP_CLIENTREQ, R_EBP ); - break; - default: - VG_(panic)("load_ebp_from_JmpKind"); - } -} - -/* Jump to the next translation, by loading its original addr into - %eax and returning to the scheduler. Signal special requirements - by loading a special value into %ebp first. -*/ -static void synth_jmp_reg ( Int reg, JmpKind jmpkind ) -{ - load_ebp_from_JmpKind ( jmpkind ); - if (reg != R_EAX) - emit_movv_reg_reg ( 4, reg, R_EAX ); - emit_ret(); -} - - -/* Same deal as synth_jmp_reg. */ -static void synth_jmp_lit ( Addr addr, JmpKind jmpkind ) -{ - load_ebp_from_JmpKind ( jmpkind ); - emit_movv_lit_reg ( 4, addr, R_EAX ); - emit_ret(); -} - - -static void synth_jcond_lit ( Condcode cond, Addr addr ) -{ - /* Do the following: - get eflags - jmp short if not cond to xyxyxy - addr -> eax - ret - xyxyxy - - 2 0000 750C jnz xyxyxy - 3 0002 B877665544 movl $0x44556677, %eax - 4 0007 C3 ret - 5 0008 FFE3 jmp *%ebx - 6 xyxyxy: - */ - emit_get_eflags(); - emit_jcondshort_delta ( invertCondition(cond), 5+1 ); - synth_jmp_lit ( addr, JmpBoring ); -} - - -static void synth_jmp_ifzero_reg_lit ( Int reg, Addr addr ) -{ - /* 0000 83FF00 cmpl $0, %edi - 0003 750A jnz next - 0005 B844332211 movl $0x11223344, %eax - 000a C3 ret - next: - */ - emit_cmpl_zero_reg ( reg ); - emit_jcondshort_delta ( CondNZ, 5+1 ); - synth_jmp_lit ( addr, JmpBoring ); -} - - -static void synth_mov_lit_reg ( Int size, UInt lit, Int reg ) -{ - /* Load the zero-extended literal into reg, at size l, - regardless of the request size. */ - emit_movv_lit_reg ( 4, lit, reg ); -} - - -static void synth_mov_regmem_reg ( Int size, Int reg1, Int reg2 ) -{ - switch (size) { - case 4: emit_movv_regmem_reg ( 4, reg1, reg2 ); break; - case 2: emit_movzwl_regmem_reg ( reg1, reg2 ); break; - case 1: emit_movzbl_regmem_reg ( reg1, reg2 ); break; - default: VG_(panic)("synth_mov_regmem_reg"); - } -} - - -static void synth_mov_offregmem_reg ( Int size, Int off, Int areg, Int reg ) -{ - switch (size) { - case 4: emit_movv_offregmem_reg ( 4, off, areg, reg ); break; - case 2: emit_movzwl_offregmem_reg ( off, areg, reg ); break; - case 1: emit_movzbl_offregmem_reg ( off, areg, reg ); break; - default: VG_(panic)("synth_mov_offregmem_reg"); - } -} - - -static void synth_mov_reg_offregmem ( Int size, Int reg, - Int off, Int areg ) -{ - switch (size) { - case 4: emit_movv_reg_offregmem ( 4, reg, off, areg ); break; - case 2: emit_movv_reg_offregmem ( 2, reg, off, areg ); break; - case 1: if (reg < 4) { - emit_movb_reg_offregmem ( reg, off, areg ); - } - else { - emit_swapl_reg_EAX ( reg ); - emit_movb_reg_offregmem ( R_AL, off, areg ); - emit_swapl_reg_EAX ( reg ); - } - break; - default: VG_(panic)("synth_mov_reg_offregmem"); - } -} - - -static void synth_mov_reg_memreg ( Int size, Int reg1, Int reg2 ) -{ - Int s1; - switch (size) { - case 4: emit_movv_reg_regmem ( 4, reg1, reg2 ); break; - case 2: emit_movv_reg_regmem ( 2, reg1, reg2 ); break; - case 1: if (reg1 < 4) { - emit_movb_reg_regmem ( reg1, reg2 ); - } - else { - /* Choose a swap reg which is < 4 and not reg1 or reg2. */ - for (s1 = 0; s1 == reg1 || s1 == reg2; s1++) ; - emit_swapl_reg_reg ( s1, reg1 ); - emit_movb_reg_regmem ( s1, reg2 ); - emit_swapl_reg_reg ( s1, reg1 ); - } - break; - default: VG_(panic)("synth_mov_reg_litmem"); - } -} - - -static void synth_unaryop_reg ( Bool upd_cc, - Opcode opcode, Int size, - Int reg ) -{ - /* NB! opcode is a uinstr opcode, not an x86 one! */ - switch (size) { - case 4: if (upd_cc) emit_get_eflags(); - emit_unaryopv_reg ( 4, opcode, reg ); - if (upd_cc) emit_put_eflags(); - break; - case 2: if (upd_cc) emit_get_eflags(); - emit_unaryopv_reg ( 2, opcode, reg ); - if (upd_cc) emit_put_eflags(); - break; - case 1: if (reg < 4) { - if (upd_cc) emit_get_eflags(); - emit_unaryopb_reg ( opcode, reg ); - if (upd_cc) emit_put_eflags(); - } else { - emit_swapl_reg_EAX ( reg ); - if (upd_cc) emit_get_eflags(); - emit_unaryopb_reg ( opcode, R_AL ); - if (upd_cc) emit_put_eflags(); - emit_swapl_reg_EAX ( reg ); - } - break; - default: VG_(panic)("synth_unaryop_reg"); - } -} - - - -static void synth_nonshiftop_reg_reg ( Bool upd_cc, - Opcode opcode, Int size, - Int reg1, Int reg2 ) -{ - /* NB! opcode is a uinstr opcode, not an x86 one! */ - switch (size) { - case 4: if (upd_cc) emit_get_eflags(); - emit_nonshiftopv_reg_reg ( 4, opcode, reg1, reg2 ); - if (upd_cc) emit_put_eflags(); - break; - case 2: if (upd_cc) emit_get_eflags(); - emit_nonshiftopv_reg_reg ( 2, opcode, reg1, reg2 ); - if (upd_cc) emit_put_eflags(); - break; - case 1: { /* Horrible ... */ - Int s1, s2; - /* Choose s1 and s2 to be x86 regs which we can talk about the - lowest 8 bits, ie either %eax, %ebx, %ecx or %edx. Make - sure s1 != s2 and that neither of them equal either reg1 or - reg2. Then use them as temporaries to make things work. */ - if (reg1 < 4 && reg2 < 4) { - if (upd_cc) emit_get_eflags(); - emit_nonshiftopb_reg_reg(opcode, reg1, reg2); - if (upd_cc) emit_put_eflags(); - break; - } - for (s1 = 0; s1 == reg1 || s1 == reg2; s1++) ; - if (reg1 >= 4 && reg2 < 4) { - emit_swapl_reg_reg ( reg1, s1 ); - if (upd_cc) emit_get_eflags(); - emit_nonshiftopb_reg_reg(opcode, s1, reg2); - if (upd_cc) emit_put_eflags(); - emit_swapl_reg_reg ( reg1, s1 ); - break; - } - for (s2 = 0; s2 == reg1 || s2 == reg2 || s2 == s1; s2++) ; - if (reg1 < 4 && reg2 >= 4) { - emit_swapl_reg_reg ( reg2, s2 ); - if (upd_cc) emit_get_eflags(); - emit_nonshiftopb_reg_reg(opcode, reg1, s2); - if (upd_cc) emit_put_eflags(); - emit_swapl_reg_reg ( reg2, s2 ); - break; - } - if (reg1 >= 4 && reg2 >= 4 && reg1 != reg2) { - emit_swapl_reg_reg ( reg1, s1 ); - emit_swapl_reg_reg ( reg2, s2 ); - if (upd_cc) emit_get_eflags(); - emit_nonshiftopb_reg_reg(opcode, s1, s2); - if (upd_cc) emit_put_eflags(); - emit_swapl_reg_reg ( reg1, s1 ); - emit_swapl_reg_reg ( reg2, s2 ); - break; - } - if (reg1 >= 4 && reg2 >= 4 && reg1 == reg2) { - emit_swapl_reg_reg ( reg1, s1 ); - if (upd_cc) emit_get_eflags(); - emit_nonshiftopb_reg_reg(opcode, s1, s1); - if (upd_cc) emit_put_eflags(); - emit_swapl_reg_reg ( reg1, s1 ); - break; - } - VG_(panic)("synth_nonshiftopb_reg_reg"); - } - default: VG_(panic)("synth_nonshiftop_reg_reg"); - } -} - - -static void synth_nonshiftop_offregmem_reg ( - Bool upd_cc, - Opcode opcode, Int size, - Int off, Int areg, Int reg ) -{ - switch (size) { - case 4: - if (upd_cc) emit_get_eflags(); - emit_nonshiftopv_offregmem_reg ( 4, opcode, off, areg, reg ); - if (upd_cc) emit_put_eflags(); - break; - case 2: - if (upd_cc) emit_get_eflags(); - emit_nonshiftopv_offregmem_reg ( 2, opcode, off, areg, reg ); - if (upd_cc) emit_put_eflags(); - break; - case 1: - if (reg < 4) { - if (upd_cc) emit_get_eflags(); - emit_nonshiftopb_offregmem_reg ( opcode, off, areg, reg ); - if (upd_cc) emit_put_eflags(); - } else { - emit_swapl_reg_EAX ( reg ); - if (upd_cc) emit_get_eflags(); - emit_nonshiftopb_offregmem_reg ( opcode, off, areg, R_AL ); - if (upd_cc) emit_put_eflags(); - emit_swapl_reg_EAX ( reg ); - } - break; - default: - VG_(panic)("synth_nonshiftop_litmem_reg"); - } -} - - -static void synth_nonshiftop_lit_reg ( Bool upd_cc, - Opcode opcode, Int size, - UInt lit, Int reg ) -{ - switch (size) { - case 4: if (upd_cc) emit_get_eflags(); - emit_nonshiftopv_lit_reg ( 4, opcode, lit, reg ); - if (upd_cc) emit_put_eflags(); - break; - case 2: if (upd_cc) emit_get_eflags(); - emit_nonshiftopv_lit_reg ( 2, opcode, lit, reg ); - if (upd_cc) emit_put_eflags(); - break; - case 1: if (reg < 4) { - if (upd_cc) emit_get_eflags(); - emit_nonshiftopb_lit_reg ( opcode, lit, reg ); - if (upd_cc) emit_put_eflags(); - } else { - emit_swapl_reg_EAX ( reg ); - if (upd_cc) emit_get_eflags(); - emit_nonshiftopb_lit_reg ( opcode, lit, R_AL ); - if (upd_cc) emit_put_eflags(); - emit_swapl_reg_EAX ( reg ); - } - break; - default: VG_(panic)("synth_nonshiftop_lit_reg"); - } -} - - -static void synth_push_reg ( Int size, Int reg ) -{ - switch (size) { - case 4: - emit_pushv_reg ( 4, reg ); - break; - case 2: - emit_pushv_reg ( 2, reg ); - break; - /* Pray that we don't have to generate this really cruddy bit of - code very often. Could do better, but can I be bothered? */ - case 1: - vg_assert(reg != R_ESP); /* duh */ - emit_add_lit_to_esp(-1); - if (reg != R_EAX) emit_swapl_reg_EAX ( reg ); - emit_movb_AL_zeroESPmem(); - if (reg != R_EAX) emit_swapl_reg_EAX ( reg ); - break; - default: - VG_(panic)("synth_push_reg"); - } -} - - -static void synth_pop_reg ( Int size, Int reg ) -{ - switch (size) { - case 4: - emit_popv_reg ( 4, reg ); - break; - case 2: - emit_popv_reg ( 2, reg ); - break; - case 1: - /* Same comment as above applies. */ - vg_assert(reg != R_ESP); /* duh */ - if (reg != R_EAX) emit_swapl_reg_EAX ( reg ); - emit_movb_zeroESPmem_AL(); - if (reg != R_EAX) emit_swapl_reg_EAX ( reg ); - emit_add_lit_to_esp(1); - break; - default: VG_(panic)("synth_pop_reg"); - } -} - - -static void synth_shiftop_reg_reg ( Bool upd_cc, - Opcode opcode, Int size, - Int regs, Int regd ) -{ - synth_push_reg ( size, regd ); - if (regs != R_ECX) emit_swapl_reg_ECX ( regs ); - if (upd_cc) emit_get_eflags(); - switch (size) { - case 4: emit_shiftopv_cl_stack0 ( 4, opcode ); break; - case 2: emit_shiftopv_cl_stack0 ( 2, opcode ); break; - case 1: emit_shiftopb_cl_stack0 ( opcode ); break; - default: VG_(panic)("synth_shiftop_reg_reg"); - } - if (upd_cc) emit_put_eflags(); - if (regs != R_ECX) emit_swapl_reg_ECX ( regs ); - synth_pop_reg ( size, regd ); -} - - -static void synth_shiftop_lit_reg ( Bool upd_cc, - Opcode opcode, Int size, - UInt lit, Int reg ) -{ - switch (size) { - case 4: if (upd_cc) emit_get_eflags(); - emit_shiftopv_lit_reg ( 4, opcode, lit, reg ); - if (upd_cc) emit_put_eflags(); - break; - case 2: if (upd_cc) emit_get_eflags(); - emit_shiftopv_lit_reg ( 2, opcode, lit, reg ); - if (upd_cc) emit_put_eflags(); - break; - case 1: if (reg < 4) { - if (upd_cc) emit_get_eflags(); - emit_shiftopb_lit_reg ( opcode, lit, reg ); - if (upd_cc) emit_put_eflags(); - } else { - emit_swapl_reg_EAX ( reg ); - if (upd_cc) emit_get_eflags(); - emit_shiftopb_lit_reg ( opcode, lit, R_AL ); - if (upd_cc) emit_put_eflags(); - emit_swapl_reg_EAX ( reg ); - } - break; - default: VG_(panic)("synth_nonshiftop_lit_reg"); - } -} - - -static void synth_setb_reg ( Int reg, Condcode cond ) -{ - emit_get_eflags(); - if (reg < 4) { - emit_setb_reg ( reg, cond ); - } else { - emit_swapl_reg_EAX ( reg ); - emit_setb_reg ( R_AL, cond ); - emit_swapl_reg_EAX ( reg ); - } -} - - -static void synth_fpu_regmem ( UChar first_byte, - UChar second_byte_masked, - Int reg ) -{ - emit_get_fpu_state(); - emit_fpu_regmem ( first_byte, second_byte_masked, reg ); - emit_put_fpu_state(); -} - - -static void synth_fpu_no_mem ( UChar first_byte, - UChar second_byte ) -{ - emit_get_fpu_state(); - emit_fpu_no_mem ( first_byte, second_byte ); - emit_put_fpu_state(); -} - - -static void synth_movl_reg_reg ( Int src, Int dst ) -{ - emit_movl_reg_reg ( src, dst ); -} - -static void synth_cmovl_reg_reg ( Condcode cond, Int src, Int dst ) -{ - emit_get_eflags(); - emit_jcondshort_delta ( invertCondition(cond), - 2 /* length of the next insn */ ); - emit_movl_reg_reg ( src, dst ); -} - - -/* Synthesise a minimal test (and which discards result) of reg32 - against lit. It's always safe do simply - emit_testv_lit_reg ( 4, lit, reg32 ) - but we try to do better when possible. -*/ -static void synth_minimal_test_lit_reg ( UInt lit, Int reg32 ) -{ - if ((lit & 0xFFFFFF00) == 0 && reg32 < 4) { - /* We can get away with a byte insn. */ - emit_testb_lit_reg ( lit, reg32 ); - } - else - if ((lit & 0xFFFF0000) == 0) { - /* Literal fits in 16 bits; do a word insn. */ - emit_testv_lit_reg ( 2, lit, reg32 ); - } - else { - /* Totally general ... */ - emit_testv_lit_reg ( 4, lit, reg32 ); - } -} - - -/*----------------------------------------------------*/ -/*--- Top level of the uinstr -> x86 translation. ---*/ -/*----------------------------------------------------*/ - -/* Return the byte offset from %ebp (ie, into baseBlock) - for the specified ArchReg or SpillNo. */ - -static Int spillOrArchOffset ( Int size, Tag tag, UInt value ) -{ - if (tag == SpillNo) { - vg_assert(size == 4); - vg_assert(value >= 0 && value < VG_MAX_SPILLSLOTS); - return 4 * (value + VGOFF_(spillslots)); - } - if (tag == ArchReg) { - switch (value) { - case R_EAX: return 4 * VGOFF_(m_eax); - case R_ECX: return 4 * VGOFF_(m_ecx); - case R_EDX: return 4 * VGOFF_(m_edx); - case R_EBX: return 4 * VGOFF_(m_ebx); - case R_ESP: - if (size == 1) return 4 * VGOFF_(m_eax) + 1; - else return 4 * VGOFF_(m_esp); - case R_EBP: - if (size == 1) return 4 * VGOFF_(m_ecx) + 1; - else return 4 * VGOFF_(m_ebp); - case R_ESI: - if (size == 1) return 4 * VGOFF_(m_edx) + 1; - else return 4 * VGOFF_(m_esi); - case R_EDI: - if (size == 1) return 4 * VGOFF_(m_ebx) + 1; - else return 4 * VGOFF_(m_edi); - } - } - VG_(panic)("spillOrArchOffset"); -} - - -static Int eflagsOffset ( void ) -{ - return 4 * VGOFF_(m_eflags); -} - - -static Int shadowOffset ( Int arch ) -{ - switch (arch) { - case R_EAX: return 4 * VGOFF_(sh_eax); - case R_ECX: return 4 * VGOFF_(sh_ecx); - case R_EDX: return 4 * VGOFF_(sh_edx); - case R_EBX: return 4 * VGOFF_(sh_ebx); - case R_ESP: return 4 * VGOFF_(sh_esp); - case R_EBP: return 4 * VGOFF_(sh_ebp); - case R_ESI: return 4 * VGOFF_(sh_esi); - case R_EDI: return 4 * VGOFF_(sh_edi); - default: VG_(panic)( "shadowOffset"); - } -} - - -static Int shadowFlagsOffset ( void ) -{ - return 4 * VGOFF_(sh_eflags); -} - - -static void synth_LOADV ( Int sz, Int a_reg, Int tv_reg ) -{ - Int i, j, helper_offw; - Int pushed[VG_MAX_REALREGS+2]; - Int n_pushed; - switch (sz) { - case 4: helper_offw = VGOFF_(helperc_LOADV4); break; - case 2: helper_offw = VGOFF_(helperc_LOADV2); break; - case 1: helper_offw = VGOFF_(helperc_LOADV1); break; - default: VG_(panic)("synth_LOADV"); - } - n_pushed = 0; - for (i = 0; i < VG_MAX_REALREGS; i++) { - j = VG_(rankToRealRegNo) ( i ); - if (VG_CALLEE_SAVED(j)) continue; - if (j == tv_reg || j == a_reg) continue; - emit_pushv_reg ( 4, j ); - pushed[n_pushed++] = j; - } - emit_pushv_reg ( 4, a_reg ); - pushed[n_pushed++] = a_reg; - vg_assert(n_pushed <= VG_MAX_REALREGS+1); - - synth_call_baseBlock_method ( False, helper_offw ); - /* Result is in %eax; we need to get it to tv_reg. */ - if (tv_reg != R_EAX) - emit_movv_reg_reg ( 4, R_EAX, tv_reg ); - - while (n_pushed > 0) { - n_pushed--; - if (pushed[n_pushed] == tv_reg) { - emit_add_lit_to_esp ( 4 ); - } else { - emit_popv_reg ( 4, pushed[n_pushed] ); - } - } -} - - -static void synth_STOREV ( Int sz, - Int tv_tag, Int tv_val, - Int a_reg ) -{ - Int i, j, helper_offw; - vg_assert(tv_tag == RealReg || tv_tag == Literal); - switch (sz) { - case 4: helper_offw = VGOFF_(helperc_STOREV4); break; - case 2: helper_offw = VGOFF_(helperc_STOREV2); break; - case 1: helper_offw = VGOFF_(helperc_STOREV1); break; - default: VG_(panic)("synth_STOREV"); - } - for (i = 0; i < VG_MAX_REALREGS; i++) { - j = VG_(rankToRealRegNo) ( i ); - if (VG_CALLEE_SAVED(j)) continue; - if ((tv_tag == RealReg && j == tv_val) || j == a_reg) continue; - emit_pushv_reg ( 4, j ); - } - if (tv_tag == RealReg) { - emit_pushv_reg ( 4, tv_val ); - } else { - if (tv_val == VG_(extend_s_8to32)(tv_val)) - emit_pushl_lit8 ( VG_(extend_s_8to32)(tv_val) ); - else - emit_pushl_lit32(tv_val); - } - emit_pushv_reg ( 4, a_reg ); - synth_call_baseBlock_method ( False, helper_offw ); - emit_popv_reg ( 4, a_reg ); - if (tv_tag == RealReg) { - emit_popv_reg ( 4, tv_val ); - } else { - emit_add_lit_to_esp ( 4 ); - } - for (i = VG_MAX_REALREGS-1; i >= 0; i--) { - j = VG_(rankToRealRegNo) ( i ); - if (VG_CALLEE_SAVED(j)) continue; - if ((tv_tag == RealReg && j == tv_val) || j == a_reg) continue; - emit_popv_reg ( 4, j ); - } -} - - -static void synth_WIDEN_signed ( Int sz_src, Int sz_dst, Int reg ) -{ - if (sz_src == 1 && sz_dst == 4) { - emit_shiftopv_lit_reg ( 4, SHL, 24, reg ); - emit_shiftopv_lit_reg ( 4, SAR, 24, reg ); - } - else if (sz_src == 2 && sz_dst == 4) { - emit_shiftopv_lit_reg ( 4, SHL, 16, reg ); - emit_shiftopv_lit_reg ( 4, SAR, 16, reg ); - } - else if (sz_src == 1 && sz_dst == 2) { - emit_shiftopv_lit_reg ( 2, SHL, 8, reg ); - emit_shiftopv_lit_reg ( 2, SAR, 8, reg ); - } - else - VG_(panic)("synth_WIDEN"); -} - - -static void synth_SETV ( Int sz, Int reg ) -{ - UInt val; - switch (sz) { - case 4: val = 0x00000000; break; - case 2: val = 0xFFFF0000; break; - case 1: val = 0xFFFFFF00; break; - case 0: val = 0xFFFFFFFE; break; - default: VG_(panic)("synth_SETV"); - } - emit_movv_lit_reg ( 4, val, reg ); -} - - -static void synth_TESTV ( Int sz, Int tag, Int val ) -{ - vg_assert(tag == ArchReg || tag == RealReg); - if (tag == ArchReg) { - switch (sz) { - case 4: - emit_testv_lit_offregmem ( - 4, 0xFFFFFFFF, shadowOffset(val), R_EBP ); - break; - case 2: - emit_testv_lit_offregmem ( - 4, 0x0000FFFF, shadowOffset(val), R_EBP ); - break; - case 1: - if (val < 4) { - emit_testv_lit_offregmem ( - 4, 0x000000FF, shadowOffset(val), R_EBP ); - } else { - emit_testv_lit_offregmem ( - 4, 0x0000FF00, shadowOffset(val-4), R_EBP ); - } - break; - case 0: - /* should never happen */ - default: - VG_(panic)("synth_TESTV(ArchReg)"); - } - } else { - switch (sz) { - case 4: - /* Works, but holds the entire 32-bit literal, hence - generating a 6-byte insn. We want to know if any bits - in the reg are set, but since this is for the full reg, - we might as well compare it against zero, which can be - done with a shorter insn. */ - /* synth_minimal_test_lit_reg ( 0xFFFFFFFF, val ); */ - emit_cmpl_zero_reg ( val ); - break; - case 2: - synth_minimal_test_lit_reg ( 0x0000FFFF, val ); - break; - case 1: - synth_minimal_test_lit_reg ( 0x000000FF, val ); - break; - case 0: - synth_minimal_test_lit_reg ( 0x00000001, val ); - break; - default: - VG_(panic)("synth_TESTV(RealReg)"); - } - } - emit_jcondshort_delta ( CondZ, 3 ); - synth_call_baseBlock_method ( - True, /* needed to guarantee that this insn is indeed 3 bytes long */ - (sz==4 ? VGOFF_(helper_value_check4_fail) - : (sz==2 ? VGOFF_(helper_value_check2_fail) - : sz == 1 ? VGOFF_(helper_value_check1_fail) - : VGOFF_(helper_value_check0_fail))) - ); -} - - -static void synth_GETV ( Int sz, Int arch, Int reg ) -{ - /* VG_(printf)("synth_GETV %d of Arch %s\n", sz, nameIReg(sz, arch)); */ - switch (sz) { - case 4: - emit_movv_offregmem_reg ( 4, shadowOffset(arch), R_EBP, reg ); - break; - case 2: - emit_movzwl_offregmem_reg ( shadowOffset(arch), R_EBP, reg ); - emit_nonshiftopv_lit_reg ( 4, OR, 0xFFFF0000, reg ); - break; - case 1: - if (arch < 4) { - emit_movzbl_offregmem_reg ( shadowOffset(arch), R_EBP, reg ); - } else { - emit_movzbl_offregmem_reg ( shadowOffset(arch-4)+1, R_EBP, reg ); - } - emit_nonshiftopv_lit_reg ( 4, OR, 0xFFFFFF00, reg ); - break; - default: - VG_(panic)("synth_GETV"); - } -} - - -static void synth_PUTV ( Int sz, Int srcTag, UInt lit_or_reg, Int arch ) -{ - if (srcTag == Literal) { - /* PUTV with a Literal is only ever used to set the corresponding - ArchReg to `all valid'. Should really be a kind of SETV. */ - UInt lit = lit_or_reg; - switch (sz) { - case 4: - vg_assert(lit == 0x00000000); - emit_movv_lit_offregmem ( 4, 0x00000000, - shadowOffset(arch), R_EBP ); - break; - case 2: - vg_assert(lit == 0xFFFF0000); - emit_movv_lit_offregmem ( 2, 0x0000, - shadowOffset(arch), R_EBP ); - break; - case 1: - vg_assert(lit == 0xFFFFFF00); - if (arch < 4) { - emit_movb_lit_offregmem ( 0x00, - shadowOffset(arch), R_EBP ); - } else { - emit_movb_lit_offregmem ( 0x00, - shadowOffset(arch-4)+1, R_EBP ); - } - break; - default: - VG_(panic)("synth_PUTV(lit)"); - } - - } else { - - UInt reg; - vg_assert(srcTag == RealReg); - - if (sz == 1 && lit_or_reg >= 4) { - emit_swapl_reg_EAX ( lit_or_reg ); - reg = R_EAX; - } else { - reg = lit_or_reg; - } - - if (sz == 1) vg_assert(reg < 4); - - switch (sz) { - case 4: - emit_movv_reg_offregmem ( 4, reg, - shadowOffset(arch), R_EBP ); - break; - case 2: - emit_movv_reg_offregmem ( 2, reg, - shadowOffset(arch), R_EBP ); - break; - case 1: - if (arch < 4) { - emit_movb_reg_offregmem ( reg, - shadowOffset(arch), R_EBP ); - } else { - emit_movb_reg_offregmem ( reg, - shadowOffset(arch-4)+1, R_EBP ); - } - break; - default: - VG_(panic)("synth_PUTV(reg)"); - } - - if (sz == 1 && lit_or_reg >= 4) { - emit_swapl_reg_EAX ( lit_or_reg ); - } - } -} - - -static void synth_GETVF ( Int reg ) -{ - emit_movv_offregmem_reg ( 4, shadowFlagsOffset(), R_EBP, reg ); - /* paranoia only; should be unnecessary ... */ - /* emit_nonshiftopv_lit_reg ( 4, OR, 0xFFFFFFFE, reg ); */ -} - - -static void synth_PUTVF ( UInt reg ) -{ - emit_movv_reg_offregmem ( 4, reg, shadowFlagsOffset(), R_EBP ); -} - - -static void synth_handle_esp_assignment ( Int reg ) -{ - emit_pushal(); - emit_pushv_reg ( 4, reg ); - synth_call_baseBlock_method ( False, VGOFF_(handle_esp_assignment) ); - emit_add_lit_to_esp ( 4 ); - emit_popal(); -} - - -static void synth_fpu_mem_check_actions ( Bool isWrite, - Int size, Int a_reg ) -{ - Int helper_offw - = isWrite ? VGOFF_(fpu_write_check) - : VGOFF_(fpu_read_check); - emit_pushal(); - emit_pushl_lit8 ( size ); - emit_pushv_reg ( 4, a_reg ); - synth_call_baseBlock_method ( False, helper_offw ); - emit_add_lit_to_esp ( 8 ); - emit_popal(); -} - - -#if 0 -/* FixMe. Useful for debugging. */ -void VG_(oink) ( Int n ) -{ - VG_(printf)("OiNk(%d): ", n ); - VG_(show_reg_tags)( &VG_(m_shadow) ); -} - -static void synth_OINK ( Int n ) -{ - emit_pushal(); - emit_movv_lit_reg ( 4, n, R_EBP ); - emit_pushl_reg ( R_EBP ); - emit_movv_lit_reg ( 4, (Addr)&VG_(oink), R_EBP ); - emit_call_reg ( R_EBP ); - emit_add_lit_to_esp ( 4 ); - emit_popal(); -} -#endif - -static void synth_TAG1_op ( VgTagOp op, Int reg ) -{ - switch (op) { - - /* Scheme is - neg %reg -- CF = %reg==0 ? 0 : 1 - sbbl %reg, %reg -- %reg = -CF - or 0xFFFFFFFE, %reg -- invalidate all bits except lowest - */ - case VgT_PCast40: - emit_unaryopv_reg(4, NEG, reg); - emit_nonshiftopv_reg_reg(4, SBB, reg, reg); - emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFFFE, reg); - break; - case VgT_PCast20: - emit_unaryopv_reg(2, NEG, reg); - emit_nonshiftopv_reg_reg(4, SBB, reg, reg); - emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFFFE, reg); - break; - case VgT_PCast10: - if (reg >= 4) { - emit_swapl_reg_EAX(reg); - emit_unaryopb_reg(NEG, R_EAX); - emit_swapl_reg_EAX(reg); - } else { - emit_unaryopb_reg(NEG, reg); - } - emit_nonshiftopv_reg_reg(4, SBB, reg, reg); - emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFFFE, reg); - break; - - /* Scheme is - andl $1, %reg -- %reg is 0 or 1 - negl %reg -- %reg is 0 or 0xFFFFFFFF - and possibly an OR to invalidate unused bits. - */ - case VgT_PCast04: - emit_nonshiftopv_lit_reg(4, AND, 0x00000001, reg); - emit_unaryopv_reg(4, NEG, reg); - break; - case VgT_PCast02: - emit_nonshiftopv_lit_reg(4, AND, 0x00000001, reg); - emit_unaryopv_reg(4, NEG, reg); - emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg); - break; - case VgT_PCast01: - emit_nonshiftopv_lit_reg(4, AND, 0x00000001, reg); - emit_unaryopv_reg(4, NEG, reg); - emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, reg); - break; - - /* Scheme is - shl $24, %reg -- make irrelevant bits disappear - negl %reg -- CF = %reg==0 ? 0 : 1 - sbbl %reg, %reg -- %reg = -CF - and possibly an OR to invalidate unused bits. - */ - case VgT_PCast14: - emit_shiftopv_lit_reg(4, SHL, 24, reg); - emit_unaryopv_reg(4, NEG, reg); - emit_nonshiftopv_reg_reg(4, SBB, reg, reg); - break; - case VgT_PCast12: - emit_shiftopv_lit_reg(4, SHL, 24, reg); - emit_unaryopv_reg(4, NEG, reg); - emit_nonshiftopv_reg_reg(4, SBB, reg, reg); - emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg); - break; - case VgT_PCast11: - emit_shiftopv_lit_reg(4, SHL, 24, reg); - emit_unaryopv_reg(4, NEG, reg); - emit_nonshiftopv_reg_reg(4, SBB, reg, reg); - emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, reg); - break; - - /* We steal %ebp (a non-allocable reg) as a temporary: - pushl %ebp - movl %reg, %ebp - negl %ebp - orl %ebp, %reg - popl %ebp - This sequence turns out to be correct regardless of the - operation width. - */ - case VgT_Left4: - case VgT_Left2: - case VgT_Left1: - vg_assert(reg != R_EDI); - emit_movv_reg_reg(4, reg, R_EDI); - emit_unaryopv_reg(4, NEG, R_EDI); - emit_nonshiftopv_reg_reg(4, OR, R_EDI, reg); - break; - - /* These are all fairly obvious; do the op and then, if - necessary, invalidate unused bits. */ - case VgT_SWiden14: - emit_shiftopv_lit_reg(4, SHL, 24, reg); - emit_shiftopv_lit_reg(4, SAR, 24, reg); - break; - case VgT_SWiden24: - emit_shiftopv_lit_reg(4, SHL, 16, reg); - emit_shiftopv_lit_reg(4, SAR, 16, reg); - break; - case VgT_SWiden12: - emit_shiftopv_lit_reg(4, SHL, 24, reg); - emit_shiftopv_lit_reg(4, SAR, 24, reg); - emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg); - break; - case VgT_ZWiden14: - emit_nonshiftopv_lit_reg(4, AND, 0x000000FF, reg); - break; - case VgT_ZWiden24: - emit_nonshiftopv_lit_reg(4, AND, 0x0000FFFF, reg); - break; - case VgT_ZWiden12: - emit_nonshiftopv_lit_reg(4, AND, 0x000000FF, reg); - emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, reg); - break; - - default: - VG_(panic)("synth_TAG1_op"); - } -} - - -static void synth_TAG2_op ( VgTagOp op, Int regs, Int regd ) -{ - switch (op) { - - /* UifU is implemented by OR, since 1 means Undefined. */ - case VgT_UifU4: - case VgT_UifU2: - case VgT_UifU1: - case VgT_UifU0: - emit_nonshiftopv_reg_reg(4, OR, regs, regd); - break; - - /* DifD is implemented by AND, since 0 means Defined. */ - case VgT_DifD4: - case VgT_DifD2: - case VgT_DifD1: - emit_nonshiftopv_reg_reg(4, AND, regs, regd); - break; - - /* ImproveAND(value, tags) = value OR tags. - Defined (0) value 0s give defined (0); all other -> undefined (1). - value is in regs; tags is in regd. - Be paranoid and invalidate unused bits; I don't know whether - or not this is actually necessary. */ - case VgT_ImproveAND4_TQ: - emit_nonshiftopv_reg_reg(4, OR, regs, regd); - break; - case VgT_ImproveAND2_TQ: - emit_nonshiftopv_reg_reg(4, OR, regs, regd); - emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, regd); - break; - case VgT_ImproveAND1_TQ: - emit_nonshiftopv_reg_reg(4, OR, regs, regd); - emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, regd); - break; - - /* ImproveOR(value, tags) = (not value) OR tags. - Defined (0) value 1s give defined (0); all other -> undefined (1). - value is in regs; tags is in regd. - To avoid trashing value, this is implemented (re de Morgan) as - not (value AND (not tags)) - Be paranoid and invalidate unused bits; I don't know whether - or not this is actually necessary. */ - case VgT_ImproveOR4_TQ: - emit_unaryopv_reg(4, NOT, regd); - emit_nonshiftopv_reg_reg(4, AND, regs, regd); - emit_unaryopv_reg(4, NOT, regd); - break; - case VgT_ImproveOR2_TQ: - emit_unaryopv_reg(4, NOT, regd); - emit_nonshiftopv_reg_reg(4, AND, regs, regd); - emit_unaryopv_reg(4, NOT, regd); - emit_nonshiftopv_lit_reg(4, OR, 0xFFFF0000, regd); - break; - case VgT_ImproveOR1_TQ: - emit_unaryopv_reg(4, NOT, regd); - emit_nonshiftopv_reg_reg(4, AND, regs, regd); - emit_unaryopv_reg(4, NOT, regd); - emit_nonshiftopv_lit_reg(4, OR, 0xFFFFFF00, regd); - break; - - default: - VG_(panic)("synth_TAG2_op"); - } -} - -/*----------------------------------------------------*/ -/*--- Generate code for a single UInstr. ---*/ -/*----------------------------------------------------*/ - -static void emitUInstr ( Int i, UInstr* u ) -{ - if (dis) - VG_(ppUInstr)(i, u); - -# if 0 - if (0&& VG_(translations_done) >= 600) { - Bool old_dis = dis; - dis = False; - synth_OINK(i); - dis = old_dis; - } -# endif - - switch (u->opcode) { - - case NOP: case CALLM_S: case CALLM_E: break; - - case INCEIP: { - vg_assert(u->tag1 == Lit16); - emit_addlit8_offregmem ( u->val1, R_EBP, 4 * VGOFF_(m_eip) ); - break; - } - - case LEA1: { - vg_assert(u->tag1 == RealReg); - vg_assert(u->tag2 == RealReg); - emit_lea_litreg_reg ( u->lit32, u->val1, u->val2 ); - break; - } - - case LEA2: { - vg_assert(u->tag1 == RealReg); - vg_assert(u->tag2 == RealReg); - vg_assert(u->tag3 == RealReg); - emit_lea_sib_reg ( u->lit32, u->extra4b, - u->val1, u->val2, u->val3 ); - break; - } - - case WIDEN: { - vg_assert(u->tag1 == RealReg); - if (u->signed_widen) { - synth_WIDEN_signed ( u->extra4b, u->size, u->val1 ); - } else { - /* no need to generate any code. */ - } - break; - } - - case SETV: { - vg_assert(VG_(clo_instrument)); - vg_assert(u->tag1 == RealReg); - synth_SETV ( u->size, u->val1 ); - break; - } - - case STOREV: { - vg_assert(VG_(clo_instrument)); - vg_assert(u->tag1 == RealReg || u->tag1 == Literal); - vg_assert(u->tag2 == RealReg); - synth_STOREV ( u->size, u->tag1, - u->tag1==Literal ? u->lit32 : u->val1, - u->val2 ); - break; - } - - case STORE: { - vg_assert(u->tag1 == RealReg); - vg_assert(u->tag2 == RealReg); - synth_mov_reg_memreg ( u->size, u->val1, u->val2 ); - /* No longer possible, but retained for illustrative purposes. - if (u->smc_check) - synth_orig_code_write_check ( u->size, u->val2 ); - */ - break; - } - - case LOADV: { - vg_assert(VG_(clo_instrument)); - vg_assert(u->tag1 == RealReg); - vg_assert(u->tag2 == RealReg); - if (0 && VG_(clo_instrument)) - emit_AMD_prefetch_reg ( u->val1 ); - synth_LOADV ( u->size, u->val1, u->val2 ); - break; - } - - case LOAD: { - vg_assert(u->tag1 == RealReg); - vg_assert(u->tag2 == RealReg); - synth_mov_regmem_reg ( u->size, u->val1, u->val2 ); - break; - } - - case TESTV: { - vg_assert(VG_(clo_instrument)); - vg_assert(u->tag1 == RealReg || u->tag1 == ArchReg); - synth_TESTV(u->size, u->tag1, u->val1); - break; - } - - case GETV: { - vg_assert(VG_(clo_instrument)); - vg_assert(u->tag1 == ArchReg); - vg_assert(u->tag2 == RealReg); - synth_GETV(u->size, u->val1, u->val2); - break; - } - - case GETVF: { - vg_assert(VG_(clo_instrument)); - vg_assert(u->tag1 == RealReg); - vg_assert(u->size == 0); - synth_GETVF(u->val1); - break; - } - - case PUTV: { - vg_assert(VG_(clo_instrument)); - vg_assert(u->tag1 == RealReg || u->tag1 == Literal); - vg_assert(u->tag2 == ArchReg); - synth_PUTV(u->size, u->tag1, - u->tag1==Literal ? u->lit32 : u->val1, - u->val2 ); - break; - } - - case PUTVF: { - vg_assert(VG_(clo_instrument)); - vg_assert(u->tag1 == RealReg); - vg_assert(u->size == 0); - synth_PUTVF(u->val1); - break; - } - - case GET: { - vg_assert(u->tag1 == ArchReg || u->tag1 == SpillNo); - vg_assert(u->tag2 == RealReg); - synth_mov_offregmem_reg ( - u->size, - spillOrArchOffset( u->size, u->tag1, u->val1 ), - R_EBP, - u->val2 - ); - break; - } - - case PUT: { - vg_assert(u->tag2 == ArchReg || u->tag2 == SpillNo); - vg_assert(u->tag1 == RealReg); - if (u->tag2 == ArchReg - && u->val2 == R_ESP - && u->size == 4 - && VG_(clo_instrument)) { - synth_handle_esp_assignment ( u->val1 ); - } - synth_mov_reg_offregmem ( - u->size, - u->val1, - spillOrArchOffset( u->size, u->tag2, u->val2 ), - R_EBP - ); - break; - } - - case GETF: { - vg_assert(u->size == 2 || u->size == 4); - vg_assert(u->tag1 == RealReg); - synth_mov_offregmem_reg ( - u->size, - eflagsOffset(), - R_EBP, - u->val1 - ); - break; - } - - case PUTF: { - vg_assert(u->size == 2 || u->size == 4); - vg_assert(u->tag1 == RealReg); - synth_mov_reg_offregmem ( - u->size, - u->val1, - eflagsOffset(), - R_EBP - ); - break; - } - - case MOV: { - vg_assert(u->tag1 == RealReg || u->tag1 == Literal); - vg_assert(u->tag2 == RealReg); - switch (u->tag1) { - case RealReg: vg_assert(u->size == 4); - if (u->val1 != u->val2) - synth_movl_reg_reg ( u->val1, u->val2 ); - break; - case Literal: synth_mov_lit_reg ( u->size, u->lit32, u->val2 ); - break; - default: VG_(panic)("emitUInstr:mov"); - } - break; - } - - case SBB: - case ADC: - case XOR: - case OR: - case AND: - case SUB: - case ADD: { - vg_assert(u->tag2 == RealReg); - switch (u->tag1) { - case Literal: synth_nonshiftop_lit_reg ( - VG_(anyFlagUse)(u), - u->opcode, u->size, u->lit32, u->val2 ); - break; - case RealReg: synth_nonshiftop_reg_reg ( - VG_(anyFlagUse)(u), - u->opcode, u->size, u->val1, u->val2 ); - break; - case ArchReg: synth_nonshiftop_offregmem_reg ( - VG_(anyFlagUse)(u), - u->opcode, u->size, - spillOrArchOffset( u->size, u->tag1, u->val1 ), - R_EBP, - u->val2 ); - break; - default: VG_(panic)("emitUInstr:non-shift-op"); - } - break; - } - - case RCR: - case RCL: - case ROR: - case ROL: - case SAR: - case SHR: - case SHL: { - vg_assert(u->tag2 == RealReg); - switch (u->tag1) { - case Literal: synth_shiftop_lit_reg ( - VG_(anyFlagUse)(u), - u->opcode, u->size, u->lit32, u->val2 ); - break; - case RealReg: synth_shiftop_reg_reg ( - VG_(anyFlagUse)(u), - u->opcode, u->size, u->val1, u->val2 ); - break; - default: VG_(panic)("emitUInstr:non-shift-op"); - } - break; - } - - case INC: - case DEC: - case NEG: - case NOT: - vg_assert(u->tag1 == RealReg); - synth_unaryop_reg ( - VG_(anyFlagUse)(u), u->opcode, u->size, u->val1 ); - break; - - case BSWAP: - vg_assert(u->tag1 == RealReg); - vg_assert(u->size == 4); - vg_assert(!VG_(anyFlagUse)(u)); - emit_bswapl_reg ( u->val1 ); - break; - - case CMOV: - vg_assert(u->tag1 == RealReg); - vg_assert(u->tag2 == RealReg); - vg_assert(u->cond != CondAlways); - vg_assert(u->size == 4); - synth_cmovl_reg_reg ( u->cond, u->val1, u->val2 ); - break; - - case JMP: { - vg_assert(u->tag2 == NoValue); - vg_assert(u->tag1 == RealReg || u->tag1 == Literal); - if (u->cond == CondAlways) { - switch (u->tag1) { - case RealReg: - synth_jmp_reg ( u->val1, u->jmpkind ); - break; - case Literal: - synth_jmp_lit ( u->lit32, u->jmpkind ); - break; - default: - VG_(panic)("emitUInstr(JMP, unconditional, default)"); - break; - } - } else { - switch (u->tag1) { - case RealReg: - VG_(panic)("emitUInstr(JMP, conditional, RealReg)"); - break; - case Literal: - vg_assert(u->jmpkind == JmpBoring); - synth_jcond_lit ( u->cond, u->lit32 ); - break; - default: - VG_(panic)("emitUInstr(JMP, conditional, default)"); - break; - } - } - break; - } - - case JIFZ: - vg_assert(u->tag1 == RealReg); - vg_assert(u->tag2 == Literal); - vg_assert(u->size == 4); - synth_jmp_ifzero_reg_lit ( u->val1, u->lit32 ); - break; - - case TAG1: - synth_TAG1_op ( u->val3, u->val1 ); - break; - - case TAG2: - if (u->val3 != VgT_DebugFn) { - synth_TAG2_op ( u->val3, u->val1, u->val2 ); - } else { - /* Assume a call to VgT_DebugFn passing both args - and placing the result back in the second. */ - Int j, k; - /* u->val2 is the reg into which the result is written. So - don't save/restore it. And it can be used at a temp for - the call target, too. Since %eax is used for the return - value from the C procedure, it is preserved only by - virtue of not being mentioned as a VG_CALLEE_SAVED reg. */ - for (k = 0; k < VG_MAX_REALREGS; k++) { - j = VG_(rankToRealRegNo) ( k ); - if (VG_CALLEE_SAVED(j)) continue; - if (j == u->val2) continue; - emit_pushv_reg ( 4, j ); - } - emit_pushv_reg(4, u->val2); - emit_pushv_reg(4, u->val1); - emit_movv_lit_reg ( 4, (UInt)(&VG_(DebugFn)), u->val2 ); - emit_call_reg ( u->val2 ); - if (u->val2 != R_EAX) - emit_movv_reg_reg ( 4, R_EAX, u->val2 ); - /* nuke args */ - emit_add_lit_to_esp(8); - for (k = VG_MAX_REALREGS-1; k >= 0; k--) { - j = VG_(rankToRealRegNo) ( k ); - if (VG_CALLEE_SAVED(j)) continue; - if (j == u->val2) continue; - emit_popv_reg ( 4, j ); - } - } - break; - - case PUSH: - vg_assert(u->tag1 == RealReg); - vg_assert(u->tag2 == NoValue); - emit_pushv_reg ( 4, u->val1 ); - break; - - case POP: - vg_assert(u->tag1 == RealReg); - vg_assert(u->tag2 == NoValue); - emit_popv_reg ( 4, u->val1 ); - break; - - case CALLM: - vg_assert(u->tag1 == Lit16); - vg_assert(u->tag2 == NoValue); - vg_assert(u->size == 0); - if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) - emit_get_eflags(); - synth_call_baseBlock_method ( False, u->val1 ); - if (u->flags_w != FlagsEmpty) - emit_put_eflags(); - break; - - case CLEAR: - vg_assert(u->tag1 == Lit16); - vg_assert(u->tag2 == NoValue); - emit_add_lit_to_esp ( u->val1 ); - break; - - case CC2VAL: - vg_assert(u->tag1 == RealReg); - vg_assert(u->tag2 == NoValue); - vg_assert(VG_(anyFlagUse)(u)); - synth_setb_reg ( u->val1, u->cond ); - break; - - /* We assume that writes to memory done by FPU_Ws are not going - to be used to create new code, so there's no orig-code-write - checks done by default. */ - case FPU_R: - case FPU_W: - vg_assert(u->tag1 == Lit16); - vg_assert(u->tag2 == RealReg); - if (VG_(clo_instrument)) - synth_fpu_mem_check_actions ( - u->opcode==FPU_W, u->size, u->val2 ); - synth_fpu_regmem ( (u->val1 >> 8) & 0xFF, - u->val1 & 0xFF, - u->val2 ); - /* No longer possible, but retained for illustrative purposes. - if (u->opcode == FPU_W && u->smc_check) - synth_orig_code_write_check ( u->size, u->val2 ); - */ - break; - - case FPU: - vg_assert(u->tag1 == Lit16); - vg_assert(u->tag2 == NoValue); - if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) - emit_get_eflags(); - synth_fpu_no_mem ( (u->val1 >> 8) & 0xFF, - u->val1 & 0xFF ); - if (u->flags_w != FlagsEmpty) - emit_put_eflags(); - break; - - default: - VG_(printf)("emitUInstr: unhandled insn:\n"); - VG_(ppUInstr)(0,u); - VG_(panic)("emitUInstr: unimplemented opcode"); - } - -} - - -/* Emit x86 for the ucode in cb, returning the address of the - generated code and setting *nbytes to its size. */ -UChar* VG_(emit_code) ( UCodeBlock* cb, Int* nbytes ) -{ - Int i; - emitted_code_used = 0; - emitted_code_size = 500; /* reasonable initial size */ - emitted_code = VG_(jitmalloc)(emitted_code_size); - - if (dis) VG_(printf)("Generated code:\n"); - - for (i = 0; i < cb->used; i++) { - if (cb->instrs[i].opcode != NOP) { - UInstr* u = &cb->instrs[i]; -# if 1 - /* Check on the sanity of this insn. */ - Bool sane = VG_(saneUInstr)( False, u ); - if (!sane) { - VG_(printf)("\ninsane instruction\n"); - VG_(ppUInstr)( i, u ); - } - vg_assert(sane); -# endif -# if 0 - /* Pass args to TAG1/TAG2 to vg_DebugFn for sanity checking. - Requires a suitable definition of vg_DebugFn. */ - if (u->opcode == TAG1) { - UInstr t1; - vg_assert(u->tag1 == RealReg); - VG_(emptyUInstr)( &t1 ); - t1.opcode = TAG2; - t1.tag1 = t1.tag2 = RealReg; - t1.val1 = t1.val2 = u->val1; - t1.tag3 = Lit16; - t1.val3 = VgT_DebugFn; - emitUInstr( i, &t1 ); - } - if (u->opcode == TAG2) { - UInstr t1; - vg_assert(u->tag1 == RealReg); - vg_assert(u->tag2 == RealReg); - VG_(emptyUInstr)( &t1 ); - t1.opcode = TAG2; - t1.tag1 = t1.tag2 = RealReg; - t1.val1 = t1.val2 = u->val1; - t1.tag3 = Lit16; - t1.val3 = VgT_DebugFn; - if (u->val3 == VgT_UifU1 || u->val3 == VgT_UifU2 - || u->val3 == VgT_UifU4 || u->val3 == VgT_DifD1 - || u->val3 == VgT_DifD2 || u->val3 == VgT_DifD4) - emitUInstr( i, &t1 ); - t1.val1 = t1.val2 = u->val2; - emitUInstr( i, &t1 ); - } -# endif - emitUInstr( i, u ); - } - } - - /* Returns a pointer to the emitted code. This will have to be - copied by the caller into the translation cache, and then freed - using VG_(jitfree). */ - *nbytes = emitted_code_used; - return emitted_code; -} - -/*--------------------------------------------------------------------*/ -/*--- end vg_from_ucode.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_helpers.S b/coregrind/vg_helpers.S deleted file mode 100644 index 82627377d6..0000000000 --- a/coregrind/vg_helpers.S +++ /dev/null @@ -1,571 +0,0 @@ - -##--------------------------------------------------------------------## -##--- Support routines for the JITter output. ---## -##--- vg_helpers.S ---## -##--------------------------------------------------------------------## - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_constants.h" - -/* ------------------ SIMULATED CPU HELPERS ------------------ */ -/* A stubs for a return which we want to catch: a signal return. - returns and pthread returns. In the latter case, the thread's - return value is in %EAX, so we pass this as the first argument - to the request. In both cases we use the user request mechanism. - You need to to read the definition of VALGRIND_MAGIC_SEQUENCE - in valgrind.h to make sense of this. -*/ -.global VG_(signalreturn_bogusRA) -VG_(signalreturn_bogusRA): - subl $20, %esp # allocate arg block - movl %esp, %edx # %edx == &_zzq_args[0] - movl $VG_USERREQ__SIGNAL_RETURNS, 0(%edx) # request - movl $0, 4(%edx) # arg1 - movl $0, 8(%edx) # arg2 - movl $0, 12(%edx) # arg3 - movl $0, 16(%edx) # arg4 - movl %edx, %eax - # and now the magic sequence itself: - roll $29, %eax - roll $3, %eax - rorl $27, %eax - rorl $5, %eax - roll $13, %eax - roll $19, %eax - # should never get here - pushl $signalreturn_bogusRA_panic_msg - call VG_(panic) - -.data -signalreturn_bogusRA_panic_msg: -.ascii "vg_signalreturn_bogusRA: VG_USERREQ__SIGNAL_RETURNS was missed" -.byte 0 -.text - - - - -/* ------------------ REAL CPU HELPERS ------------------ */ -/* The rest of this lot run on the real CPU. */ - -/* Various helper routines, for instructions which are just too - darn tedious for the JITter to output code in-line: - - * integer division - * integer multiplication - * setting and getting obscure eflags - * double-length shifts - - All routines use a standard calling convention designed for - calling from translations, in which the incoming args are - underneath the return address, the callee saves _all_ registers, - and the incoming parameters can be modified, to return results. -*/ - - -.global VG_(helper_value_check0_fail) -VG_(helper_value_check0_fail): - pushal - call VG_(helperc_value_check0_fail) - popal - ret - -.global VG_(helper_value_check1_fail) -VG_(helper_value_check1_fail): - pushal - call VG_(helperc_value_check1_fail) - popal - ret - -.global VG_(helper_value_check2_fail) -VG_(helper_value_check2_fail): - pushal - call VG_(helperc_value_check2_fail) - popal - ret - -.global VG_(helper_value_check4_fail) -VG_(helper_value_check4_fail): - pushal - call VG_(helperc_value_check4_fail) - popal - ret - - -/* Fetch the time-stamp-ctr reg. - On entry: - dummy, replaced by %EAX value - dummy, replaced by %EDX value - RA <- %esp -*/ -.global VG_(helper_RDTSC) -VG_(helper_RDTSC): - pushl %eax - pushl %edx - rdtsc - movl %edx, 12(%esp) - movl %eax, 16(%esp) - popl %edx - popl %eax - ret - - -/* Do the CPUID instruction. - On entry: - dummy, replaced by %EAX value - dummy, replaced by %EBX value - dummy, replaced by %ECX value - dummy, replaced by %EDX value - RA <- %esp - - As emulating a real CPUID is kinda hard, as it - has to return different values depending on EAX, - we just pretend to not support CPUID at all until - it becomes a problem. This will for sure disable - all MMX / 3dnow checks so they don't bother us - with code we don't understand. (Dirk ) - - http://www.sandpile.org/ia32/cpuid.htm - - (Later: we instead pretend to be like Werner's P54C P133, that is - an original pre-MMX Pentium). - cpuid words (0): 0x1 0x756e6547 0x6c65746e 0x49656e69 - cpuid words (1): 0x52b 0x0 0x0 0x1bf -*/ -.global VG_(helper_CPUID) -VG_(helper_CPUID): - pushl %eax - pushl %ebx - pushl %ecx - pushl %edx - movl 32(%esp), %eax -/* - cpuid -*/ -/* - xor %eax,%eax - xor %ebx,%ebx - xor %ecx,%ecx - xor %edx,%edx -*/ - cmpl $0, %eax - jz cpuid__0 - movl $0x52b, %eax - movl $0x0, %ebx - movl $0x0, %ecx - movl $0x1bf, %edx - jmp cpuid__99 -cpuid__0: - movl $0x1, %eax - movl $0x756e6547, %ebx - movl $0x6c65746e, %ecx - movl $0x49656e69, %edx -cpuid__99: - - movl %edx, 20(%esp) - movl %ecx, 24(%esp) - movl %ebx, 28(%esp) - movl %eax, 32(%esp) - popl %edx - popl %ecx - popl %ebx - popl %eax - ret - - -/* Fetch the FPU status register. - On entry: - dummy, replaced by result - RA <- %esp -*/ -.global VG_(helper_fstsw_AX) -VG_(helper_fstsw_AX): - pushl %eax - pushl %esi - movl VGOFF_(m_fpustate), %esi - frstor (%ebp, %esi, 4) - fstsw %ax - popl %esi - movw %ax, 8(%esp) - popl %eax - ret - - -/* Copy %ah into %eflags. - On entry: - value of %eax - RA <- %esp -*/ -.global VG_(helper_SAHF) -VG_(helper_SAHF): - pushl %eax - movl 8(%esp), %eax - sahf - popl %eax - ret - - -/* Do %al = DAS(%al). Note that the passed param has %AL as the least - significant 8 bits, since it was generated with GETB %AL, - some-temp. Fortunately %al is the least significant 8 bits of - %eax anyway, which is why it's safe to work with %eax as a - whole. - - On entry: - value of %eax - RA <- %esp -*/ -.global VG_(helper_DAS) -VG_(helper_DAS): - pushl %eax - movl 8(%esp), %eax - das - movl %eax, 8(%esp) - popl %eax - ret - - -/* Similarly, do %al = DAA(%al). */ -.global VG_(helper_DAA) -VG_(helper_DAA): - pushl %eax - movl 8(%esp), %eax - daa - movl %eax, 8(%esp) - popl %eax - ret - - -/* Bit scan forwards/reverse. Sets flags (??). - On entry: - value, replaced by result - RA <- %esp -*/ -.global VG_(helper_bsr) -VG_(helper_bsr): - pushl %eax - movl 12(%esp), %eax - bsrl 8(%esp), %eax - movl %eax, 12(%esp) - popl %eax - ret - -.global VG_(helper_bsf) -VG_(helper_bsf): - pushl %eax - movl 12(%esp), %eax - bsfl 8(%esp), %eax - movl %eax, 12(%esp) - popl %eax - ret - - -/* 32-bit double-length shift left/right. - On entry: - amount - src - dst - RA <- %esp -*/ -.global VG_(helper_shldl) -VG_(helper_shldl): - pushl %eax - pushl %ebx - pushl %ecx - - movb 24(%esp), %cl - movl 20(%esp), %ebx - movl 16(%esp), %eax - shldl %cl, %ebx, %eax - movl %eax, 16(%esp) - - popl %ecx - popl %ebx - popl %eax - ret - -.global VG_(helper_shldw) -VG_(helper_shldw): - pushl %eax - pushl %ebx - pushl %ecx - - movb 24(%esp), %cl - movw 20(%esp), %bx - movw 16(%esp), %ax - shldw %cl, %bx, %ax - movw %ax, 16(%esp) - - popl %ecx - popl %ebx - popl %eax - ret - -.global VG_(helper_shrdl) -VG_(helper_shrdl): - pushl %eax - pushl %ebx - pushl %ecx - - movb 24(%esp), %cl - movl 20(%esp), %ebx - movl 16(%esp), %eax - shrdl %cl, %ebx, %eax - movl %eax, 16(%esp) - - popl %ecx - popl %ebx - popl %eax - ret - -.global VG_(helper_shrdw) -VG_(helper_shrdw): - pushl %eax - pushl %ebx - pushl %ecx - - movb 24(%esp), %cl - movw 20(%esp), %bx - movw 16(%esp), %ax - shrdw %cl, %bx, %ax - movw %ax, 16(%esp) - - popl %ecx - popl %ebx - popl %eax - ret - - -/* Get the direction flag, and return either 1 or -1. */ -.global VG_(helper_get_dirflag) -VG_(helper_get_dirflag): - pushfl - pushl %eax - - pushfl - popl %eax - shrl $10, %eax - andl $1, %eax - jnz L1 - movl $1, %eax - jmp L2 -L1: movl $-1, %eax -L2: movl %eax, 12(%esp) - - popl %eax - popfl - ret - - -/* Clear/set the direction flag. */ -.global VG_(helper_CLD) -VG_(helper_CLD): - cld - ret - -.global VG_(helper_STD) -VG_(helper_STD): - std - ret - -/* Clear/set the carry flag. */ -.global VG_(helper_CLC) -VG_(helper_CLC): - clc - ret - -.global VG_(helper_STC) -VG_(helper_STC): - stc - ret - -/* Signed 32-to-64 multiply. */ -.globl VG_(helper_imul_32_64) -VG_(helper_imul_32_64): - pushl %eax - pushl %edx - movl 16(%esp), %eax - imull 12(%esp) - movl %eax, 16(%esp) - movl %edx, 12(%esp) - popl %edx - popl %eax - ret - -/* Signed 16-to-32 multiply. */ -.globl VG_(helper_imul_16_32) -VG_(helper_imul_16_32): - pushl %eax - pushl %edx - movw 16(%esp), %ax - imulw 12(%esp) - movw %ax, 16(%esp) - movw %dx, 12(%esp) - popl %edx - popl %eax - ret - -/* Signed 8-to-16 multiply. */ -.globl VG_(helper_imul_8_16) -VG_(helper_imul_8_16): - pushl %eax - pushl %edx - movb 16(%esp), %al - imulb 12(%esp) - movw %ax, 16(%esp) - popl %edx - popl %eax - ret - - - - - - -/* Unsigned 32-to-64 multiply. */ -.globl VG_(helper_mul_32_64) -VG_(helper_mul_32_64): - pushl %eax - pushl %edx - movl 16(%esp), %eax - mull 12(%esp) - movl %eax, 16(%esp) - movl %edx, 12(%esp) - popl %edx - popl %eax - ret - -/* Unsigned 16-to-32 multiply. */ -.globl VG_(helper_mul_16_32) -VG_(helper_mul_16_32): - pushl %eax - pushl %edx - movw 16(%esp), %ax - mulw 12(%esp) - movw %ax, 16(%esp) - movw %dx, 12(%esp) - popl %edx - popl %eax - ret - -/* Unsigned 8-to-16 multiply. */ -.globl VG_(helper_mul_8_16) -VG_(helper_mul_8_16): - pushl %eax - pushl %edx - movb 16(%esp), %al - mulb 12(%esp) - movw %ax, 16(%esp) - popl %edx - popl %eax - ret - - - - -/* Unsigned 64-into-32 divide. */ -.globl VG_(helper_div_64_32) -VG_(helper_div_64_32): - pushl %eax - pushl %edx - movl 16(%esp),%eax - movl 12(%esp),%edx - divl 20(%esp) - movl %eax,16(%esp) - movl %edx,12(%esp) - popl %edx - popl %eax - ret - -/* Signed 64-into-32 divide. */ -.globl VG_(helper_idiv_64_32) -VG_(helper_idiv_64_32): - pushl %eax - pushl %edx - movl 16(%esp),%eax - movl 12(%esp),%edx - idivl 20(%esp) - movl %eax,16(%esp) - movl %edx,12(%esp) - popl %edx - popl %eax - ret - -/* Unsigned 32-into-16 divide. */ -.globl VG_(helper_div_32_16) -VG_(helper_div_32_16): - pushl %eax - pushl %edx - movw 16(%esp),%ax - movw 12(%esp),%dx - divw 20(%esp) - movw %ax,16(%esp) - movw %dx,12(%esp) - popl %edx - popl %eax - ret - -/* Signed 32-into-16 divide. */ -.globl VG_(helper_idiv_32_16) -VG_(helper_idiv_32_16): - pushl %eax - pushl %edx - movw 16(%esp),%ax - movw 12(%esp),%dx - idivw 20(%esp) - movw %ax,16(%esp) - movw %dx,12(%esp) - popl %edx - popl %eax - ret - -/* Unsigned 16-into-8 divide. */ -.globl VG_(helper_div_16_8) -VG_(helper_div_16_8): - pushl %eax - movw 12(%esp),%ax - divb 16(%esp) - movb %ah,12(%esp) - movb %al,8(%esp) - popl %eax - ret - -/* Signed 16-into-8 divide. */ -.globl VG_(helper_idiv_16_8) -VG_(helper_idiv_16_8): - pushl %eax - movw 12(%esp),%ax - idivb 16(%esp) - movb %ah,12(%esp) - movb %al,8(%esp) - popl %eax - ret - - -##--------------------------------------------------------------------## -##--- end vg_helpers.S ---## -##--------------------------------------------------------------------## diff --git a/coregrind/vg_include.h b/coregrind/vg_include.h deleted file mode 100644 index 0d38c92332..0000000000 --- a/coregrind/vg_include.h +++ /dev/null @@ -1,2023 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- A header file for all parts of Valgrind. ---*/ -/*--- Include no other! ---*/ -/*--- vg_include.h ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#ifndef __VG_INCLUDE_H -#define __VG_INCLUDE_H - - -#include /* ANSI varargs stuff */ -#include /* for jmp_buf */ - - -/* --------------------------------------------------------------------- - Where to send bug reports to. - ------------------------------------------------------------------ */ - -#define VG_EMAIL_ADDR "jseward@acm.org" - - -/* --------------------------------------------------------------------- - Build options and table sizes. You should be able to change these - options or sizes, recompile, and still have a working system. - ------------------------------------------------------------------ */ - -#include "vg_constants.h" - - -/* Set to 1 to enable time profiling. Since this uses SIGPROF, we - don't want this permanently enabled -- only for profiling - builds. */ -#if 0 -# define VG_PROFILE -#endif - - -/* Total number of integer registers available for allocation. That's - all of them except %esp, %edi and %ebp. %edi is a general spare - temporary. %ebp permanently points at VG_(baseBlock). Note that - it's important that this tie in with what rankToRealRegNo() says. - DO NOT CHANGE THIS VALUE FROM 5. ! */ -#define VG_MAX_REALREGS 5 - -/* Total number of spill slots available for allocation, if a TempReg - doesn't make it into a RealReg. Just bomb the entire system if - this value is too small; we don't expect it will ever get - particularly high. */ -#define VG_MAX_SPILLSLOTS 24 - - -/* Constants for the slow translation lookup cache. */ -#define VG_TRANSTAB_SLOW_BITS 11 -#define VG_TRANSTAB_SLOW_SIZE (1 << VG_TRANSTAB_SLOW_BITS) -#define VG_TRANSTAB_SLOW_MASK ((VG_TRANSTAB_SLOW_SIZE) - 1) - -/* Size of a buffer used for creating messages. */ -#define M_VG_MSGBUF 10000 - -/* Size of a smallish table used to read /proc/self/map entries. */ -#define M_PROCMAP_BUF 50000 - -/* Max length of pathname to a .so/executable file. */ -#define M_VG_LIBNAMESTR 100 - -/* Max length of a text fragment used to construct error messages. */ -#define M_VG_ERRTXT 512 - -/* Max length of the string copied from env var VG_ARGS at startup. */ -#define M_VG_CMDLINE_STRLEN 1000 - -/* Max number of options for Valgrind which we can handle. */ -#define M_VG_CMDLINE_OPTS 100 - -/* After this many different unsuppressed errors have been observed, - be more conservative about collecting new ones. */ -#define M_VG_COLLECT_ERRORS_SLOWLY_AFTER 50 - -/* After this many different unsuppressed errors have been observed, - stop collecting errors at all, and tell the user their program is - evidently a steaming pile of camel dung. */ -#define M_VG_COLLECT_NO_ERRORS_AFTER_SHOWN 300 - -/* After this many total errors have been observed, stop collecting - errors at all. Counterpart to M_VG_COLLECT_NO_ERRORS_AFTER_SHOWN. */ -#define M_VG_COLLECT_NO_ERRORS_AFTER_FOUND 30000 - -/* These many bytes below %ESP are considered addressible if we're - doing the --workaround-gcc296-bugs hack. */ -#define VG_GCC296_BUG_STACK_SLOP 1024 - -/* The maximum number of calls we're prepared to save in a - backtrace. */ -#define VG_DEEPEST_BACKTRACE 50 - -/* Number of lists in which we keep track of malloc'd but not free'd - blocks. Should be prime. */ -#define VG_N_MALLOCLISTS 997 - -/* Number of lists in which we keep track of ExeContexts. Should be - prime. */ -#define VG_N_EC_LISTS /*997*/ 4999 - -/* Defines the thread-scheduling timeslice, in terms of the number of - basic blocks we attempt to run each thread for. Smaller values - give finer interleaving but much increased scheduling overheads. */ -#define VG_SCHEDULING_QUANTUM 50000 - -/* The maximum number of pthreads that we support. This is - deliberately not very high since our implementation of some of the - scheduler algorithms is surely O(N) in the number of threads, since - that's simple, at least. And (in practice) we hope that most - programs do not need many threads. */ -#define VG_N_THREADS 50 - -/* Maximum number of pthread keys available. Again, we start low until - the need for a higher number presents itself. */ -#define VG_N_THREAD_KEYS 50 - -/* Number of file descriptors that can simultaneously be waited on for - I/O to complete. Perhaps this should be the same as VG_N_THREADS - (surely a thread can't wait on more than one fd at once?. Who - knows.) */ -#define VG_N_WAITING_FDS 10 - -/* Stack size for a thread. We try and check that they do not go - beyond it. */ -#define VG_PTHREAD_STACK_SIZE (1 << 20) - -/* Number of entries in the semaphore-remapping table. */ -#define VG_N_SEMAPHORES 50 - -/* Number of entries in the rwlock-remapping table. */ -#define VG_N_RWLOCKS 50 - -/* Number of entries in each thread's cleanup stack. */ -#define VG_N_CLEANUPSTACK 8 - -/* Number of entries in each thread's fork-handler stack. */ -#define VG_N_FORKHANDLERSTACK 2 - - -/* --------------------------------------------------------------------- - Basic types - ------------------------------------------------------------------ */ - -typedef unsigned char UChar; -typedef unsigned short UShort; -typedef unsigned int UInt; -typedef unsigned long long int ULong; - -typedef signed char Char; -typedef signed short Short; -typedef signed int Int; -typedef signed long long int Long; - -typedef unsigned int Addr; - -typedef unsigned char Bool; -#define False ((Bool)0) -#define True ((Bool)1) - -#define mycat_wrk(aaa,bbb) aaa##bbb -#define mycat(aaa,bbb) mycat_wrk(aaa,bbb) - -/* Just pray that gcc's constant folding works properly ... */ -#define BITS(bit7,bit6,bit5,bit4,bit3,bit2,bit1,bit0) \ - ( ((bit7) << 7) | ((bit6) << 6) | ((bit5) << 5) | ((bit4) << 4) \ - | ((bit3) << 3) | ((bit2) << 2) | ((bit1) << 1) | (bit0)) - -/* For cache simulation */ -typedef struct { - int size; /* bytes */ - int assoc; - int line_size; /* bytes */ -} cache_t; - -#define UNDEFINED_CACHE ((cache_t) { -1, -1, -1 }) - -/* --------------------------------------------------------------------- - Now the basic types are set up, we can haul in the kernel-interface - definitions. - ------------------------------------------------------------------ */ - -#include "./vg_kerneliface.h" - - -/* --------------------------------------------------------------------- - Command-line-settable options - ------------------------------------------------------------------ */ - -#define VG_CLO_SMC_NONE 0 -#define VG_CLO_SMC_SOME 1 -#define VG_CLO_SMC_ALL 2 - -#define VG_CLO_MAX_SFILES 10 - -/* Should we stop collecting errors if too many appear? default: YES */ -extern Bool VG_(clo_error_limit); -/* Shall we V-check addrs (they are always A checked too): default: YES */ -extern Bool VG_(clo_check_addrVs); -/* Enquire about whether to attach to GDB at errors? default: NO */ -extern Bool VG_(clo_GDB_attach); -/* Sanity-check level: 0 = none, 1 (default), > 1 = expensive. */ -extern Int VG_(sanity_level); -/* Verbosity level: 0 = silent, 1 (default), > 1 = more verbose. */ -extern Int VG_(clo_verbosity); -/* Automatically attempt to demangle C++ names? default: YES */ -extern Bool VG_(clo_demangle); -/* Do leak check at exit? default: NO */ -extern Bool VG_(clo_leak_check); -/* In leak check, show reachable-but-not-freed blocks? default: NO */ -extern Bool VG_(clo_show_reachable); -/* How closely should we compare ExeContexts in leak records? default: 2 */ -extern Int VG_(clo_leak_resolution); -/* Round malloc sizes upwards to integral number of words? default: - NO */ -extern Bool VG_(clo_sloppy_malloc); -/* Minimum alignment in functions that don't specify alignment explicitly. - default: 0, i.e. use default of the machine (== 4) */ -extern Int VG_(clo_alignment); -/* Allow loads from partially-valid addresses? default: YES */ -extern Bool VG_(clo_partial_loads_ok); -/* Simulate child processes? default: NO */ -extern Bool VG_(clo_trace_children); -/* The file id on which we send all messages. default: 2 (stderr). */ -extern Int VG_(clo_logfile_fd); -/* Max volume of the freed blocks queue. */ -extern Int VG_(clo_freelist_vol); -/* Assume accesses immediately below %esp are due to gcc-2.96 bugs. - default: NO */ -extern Bool VG_(clo_workaround_gcc296_bugs); - -/* The number of suppression files specified. */ -extern Int VG_(clo_n_suppressions); -/* The names of the suppression files. */ -extern Char* VG_(clo_suppressions)[VG_CLO_MAX_SFILES]; - -/* Single stepping? default: NO */ -extern Bool VG_(clo_single_step); -/* Code improvement? default: YES */ -extern Bool VG_(clo_optimise); -/* Memory-check instrumentation? default: YES */ -extern Bool VG_(clo_instrument); -/* DEBUG: clean up instrumented code? default: YES */ -extern Bool VG_(clo_cleanup); -/* Cache simulation instrumentation? default: NO */ -extern Bool VG_(clo_cachesim); -/* I1 cache configuration. default: undefined */ -extern cache_t VG_(clo_I1_cache); -/* D1 cache configuration. default: undefined */ -extern cache_t VG_(clo_D1_cache); -/* L2 cache configuration. default: undefined */ -extern cache_t VG_(clo_L2_cache); -/* SMC write checks? default: SOME (1,2,4 byte movs to mem) */ -extern Int VG_(clo_smc_check); -/* DEBUG: print system calls? default: NO */ -extern Bool VG_(clo_trace_syscalls); -/* DEBUG: print signal details? default: NO */ -extern Bool VG_(clo_trace_signals); -/* DEBUG: print symtab details? default: NO */ -extern Bool VG_(clo_trace_symtab); -/* DEBUG: print malloc details? default: NO */ -extern Bool VG_(clo_trace_malloc); -/* DEBUG: print thread scheduling events? default: NO */ -extern Bool VG_(clo_trace_sched); -/* DEBUG: print pthread (mutex etc) events? default: 0 (none), 1 - (some), 2 (all) */ -extern Int VG_(clo_trace_pthread_level); -/* Stop after this many basic blocks. default: Infinity. */ -extern ULong VG_(clo_stop_after); -/* Display gory details for the k'th most popular error. default: - Infinity. */ -extern Int VG_(clo_dump_error); -/* Number of parents of a backtrace. Default: 8. */ -extern Int VG_(clo_backtrace_size); -/* Engage miscellaneous wierd hacks needed for some progs. */ -extern Char* VG_(clo_weird_hacks); - - -/* --------------------------------------------------------------------- - Debugging and profiling stuff - ------------------------------------------------------------------ */ - -/* No, really. I _am_ that strange. */ -#define OINK(nnn) VG_(message)(Vg_DebugMsg, "OINK %d",nnn) - -/* Tools for building messages from multiple parts. */ -typedef - enum { Vg_UserMsg, Vg_DebugMsg, Vg_DebugExtraMsg } - VgMsgKind; - -extern void VG_(start_msg) ( VgMsgKind kind ); -extern void VG_(add_to_msg) ( Char* format, ... ); -extern void VG_(end_msg) ( void ); - -/* Send a simple, single-part message. */ -extern void VG_(message) ( VgMsgKind kind, Char* format, ... ); - -/* Create a logfile into which messages can be dumped. */ -extern void VG_(startup_logging) ( void ); -extern void VG_(shutdown_logging) ( void ); - - -/* Profiling stuff */ -#ifdef VG_PROFILE - -#define VGP_M_STACK 10 - -#define VGP_M_CCS 26 /* == the # of elems in VGP_LIST */ -#define VGP_LIST \ - VGP_PAIR(VgpUnc=0, "unclassified"), \ - VGP_PAIR(VgpRun, "running"), \ - VGP_PAIR(VgpSched, "scheduler"), \ - VGP_PAIR(VgpMalloc, "low-lev malloc/free"), \ - VGP_PAIR(VgpCliMalloc, "client malloc/free"), \ - VGP_PAIR(VgpTranslate, "translate-main"), \ - VGP_PAIR(VgpToUCode, "to-ucode"), \ - VGP_PAIR(VgpFromUcode, "from-ucode"), \ - VGP_PAIR(VgpImprove, "improve"), \ - VGP_PAIR(VgpInstrument, "instrument"), \ - VGP_PAIR(VgpCleanup, "cleanup"), \ - VGP_PAIR(VgpRegAlloc, "reg-alloc"), \ - VGP_PAIR(VgpDoLRU, "do-lru"), \ - VGP_PAIR(VgpSlowFindT, "slow-search-transtab"), \ - VGP_PAIR(VgpInitAudit, "init-mem-audit"), \ - VGP_PAIR(VgpExeContext, "exe-context"), \ - VGP_PAIR(VgpReadSyms, "read-syms"), \ - VGP_PAIR(VgpAddToT, "add-to-transtab"), \ - VGP_PAIR(VgpSARP, "set-addr-range-perms"), \ - VGP_PAIR(VgpSyscall, "syscall wrapper"), \ - VGP_PAIR(VgpCacheInstrument, "cache instrument"), \ - VGP_PAIR(VgpCacheGetBBCC,"cache get BBCC"), \ - VGP_PAIR(VgpCacheSimulate, "cache simulate"), \ - VGP_PAIR(VgpCacheDump, "cache stats dump"), \ - VGP_PAIR(VgpSpare1, "spare 1"), \ - VGP_PAIR(VgpSpare2, "spare 2") - -#define VGP_PAIR(enumname,str) enumname -typedef enum { VGP_LIST } VgpCC; -#undef VGP_PAIR - -extern void VGP_(init_profiling) ( void ); -extern void VGP_(done_profiling) ( void ); -extern void VGP_(pushcc) ( VgpCC ); -extern void VGP_(popcc) ( void ); - -#define VGP_PUSHCC(cc) VGP_(pushcc)(cc) -#define VGP_POPCC VGP_(popcc)() - -#else - -#define VGP_PUSHCC(cc) /* */ -#define VGP_POPCC /* */ - -#endif /* VG_PROFILE */ - - -/* --------------------------------------------------------------------- - Exports of vg_malloc2.c - ------------------------------------------------------------------ */ - -/* Allocation arenas. - SYMTAB is for Valgrind's symbol table storage. - CLIENT is for the client's mallocs/frees. - DEMANGLE is for the C++ demangler. - EXECTXT is for storing ExeContexts. - ERRCTXT is for storing ErrContexts. - PRIVATE is for Valgrind general stuff. - TRANSIENT is for very short-term use. It should be empty - in between uses. - When adding a new arena, remember also to add it - to ensure_mm_init(). -*/ -typedef Int ArenaId; - -#define VG_N_ARENAS 7 - -#define VG_AR_PRIVATE 0 /* :: ArenaId */ -#define VG_AR_SYMTAB 1 /* :: ArenaId */ -#define VG_AR_CLIENT 2 /* :: ArenaId */ -#define VG_AR_DEMANGLE 3 /* :: ArenaId */ -#define VG_AR_EXECTXT 4 /* :: ArenaId */ -#define VG_AR_ERRCTXT 5 /* :: ArenaId */ -#define VG_AR_TRANSIENT 6 /* :: ArenaId */ - -extern void* VG_(malloc) ( ArenaId arena, Int nbytes ); -extern void VG_(free) ( ArenaId arena, void* ptr ); -extern void* VG_(calloc) ( ArenaId arena, Int nmemb, Int nbytes ); -extern void* VG_(realloc) ( ArenaId arena, void* ptr, Int size ); -extern void* VG_(malloc_aligned) ( ArenaId aid, Int req_alignB, - Int req_pszB ); - -extern void VG_(mallocSanityCheckArena) ( ArenaId arena ); -extern void VG_(mallocSanityCheckAll) ( void ); - -extern void VG_(show_all_arena_stats) ( void ); -extern Bool VG_(is_empty_arena) ( ArenaId aid ); - - -/* The red-zone size for the client. This can be arbitrary, but - unfortunately must be set at compile time. */ -#define VG_AR_CLIENT_REDZONE_SZW 4 - -#define VG_AR_CLIENT_REDZONE_SZB \ - (VG_AR_CLIENT_REDZONE_SZW * VKI_BYTES_PER_WORD) - - -/* --------------------------------------------------------------------- - Exports of vg_clientfuns.c - ------------------------------------------------------------------ */ - -/* This doesn't export code or data that valgrind.so needs to link - against. However, the scheduler does need to know the following - request codes. A few, publically-visible, request codes are also - defined in valgrind.h. */ - -#define VG_USERREQ__MALLOC 0x2001 -#define VG_USERREQ__BUILTIN_NEW 0x2002 -#define VG_USERREQ__BUILTIN_VEC_NEW 0x2003 - -#define VG_USERREQ__FREE 0x2004 -#define VG_USERREQ__BUILTIN_DELETE 0x2005 -#define VG_USERREQ__BUILTIN_VEC_DELETE 0x2006 - -#define VG_USERREQ__CALLOC 0x2007 -#define VG_USERREQ__REALLOC 0x2008 -#define VG_USERREQ__MEMALIGN 0x2009 - - -/* (Fn, Arg): Create a new thread and run Fn applied to Arg in it. Fn - MUST NOT return -- ever. Eventually it will do either __QUIT or - __WAIT_JOINER. */ -#define VG_USERREQ__APPLY_IN_NEW_THREAD 0x3001 - -/* ( no-args ): calling thread disappears from the system forever. - Reclaim resources. */ -#define VG_USERREQ__QUIT 0x3002 - -/* ( void* ): calling thread waits for joiner and returns the void* to - it. */ -#define VG_USERREQ__WAIT_JOINER 0x3003 - -/* ( ThreadId, void** ): wait to join a thread. */ -#define VG_USERREQ__PTHREAD_JOIN 0x3004 - -/* Set cancellation state and type for this thread. */ -#define VG_USERREQ__SET_CANCELSTATE 0x3005 -#define VG_USERREQ__SET_CANCELTYPE 0x3006 - -/* ( no-args ): Test if we are at a cancellation point. */ -#define VG_USERREQ__TESTCANCEL 0x3007 - -/* ( ThreadId, &thread_exit_wrapper is the only allowable arg ): call - with this arg to indicate that a cancel is now pending for the - specified thread. */ -#define VG_USERREQ__SET_CANCELPEND 0x3008 - -/* Set/get detach state for this thread. */ -#define VG_USERREQ__SET_OR_GET_DETACH 0x3009 - -#define VG_USERREQ__PTHREAD_GET_THREADID 0x300B -#define VG_USERREQ__PTHREAD_MUTEX_LOCK 0x300C -#define VG_USERREQ__PTHREAD_MUTEX_TRYLOCK 0x300D -#define VG_USERREQ__PTHREAD_MUTEX_UNLOCK 0x300E -#define VG_USERREQ__PTHREAD_COND_WAIT 0x300F -#define VG_USERREQ__PTHREAD_COND_TIMEDWAIT 0x3010 -#define VG_USERREQ__PTHREAD_COND_SIGNAL 0x3011 -#define VG_USERREQ__PTHREAD_COND_BROADCAST 0x3012 -#define VG_USERREQ__PTHREAD_KEY_CREATE 0x3013 -#define VG_USERREQ__PTHREAD_KEY_DELETE 0x3014 -#define VG_USERREQ__PTHREAD_SETSPECIFIC 0x3015 -#define VG_USERREQ__PTHREAD_GETSPECIFIC 0x3016 -#define VG_USERREQ__READ_MILLISECOND_TIMER 0x3017 -#define VG_USERREQ__PTHREAD_SIGMASK 0x3018 -#define VG_USERREQ__SIGWAIT 0x3019 -#define VG_USERREQ__PTHREAD_KILL 0x301A -#define VG_USERREQ__PTHREAD_YIELD 0x301B - -#define VG_USERREQ__CLEANUP_PUSH 0x3020 -#define VG_USERREQ__CLEANUP_POP 0x3021 -#define VG_USERREQ__GET_KEY_D_AND_S 0x3022 - -#define VG_USERREQ__NUKE_OTHER_THREADS 0x3023 - -/* Ask how many signal handler returns have happened to this - thread. */ -#define VG_USERREQ__GET_N_SIGS_RETURNED 0x3024 - -/* Get/set entries for a thread's pthread_atfork stack. */ -#define VG_USERREQ__SET_FHSTACK_USED 0x3025 -#define VG_USERREQ__GET_FHSTACK_USED 0x3026 -#define VG_USERREQ__SET_FHSTACK_ENTRY 0x3027 -#define VG_USERREQ__GET_FHSTACK_ENTRY 0x3028 - -/* Denote the finish of VG_(__libc_freeres_wrapper). */ -#define VG_USERREQ__LIBC_FREERES_DONE 0x3029 - -/* Cosmetic ... */ -#define VG_USERREQ__GET_PTHREAD_TRACE_LEVEL 0x3101 -/* Log a pthread error from client-space. Cosmetic. */ -#define VG_USERREQ__PTHREAD_ERROR 0x3102 - -/* -In vg_constants.h: -#define VG_USERREQ__SIGNAL_RETURNS 0x4001 -*/ - -/* The scheduler does need to know the address of it so it can be - called at program exit. */ -extern void VG_(__libc_freeres_wrapper)( void ); - - -/* --------------------------------------------------------------------- - Constants pertaining to the simulated CPU state, VG_(baseBlock), - which need to go here to avoid ugly circularities. - ------------------------------------------------------------------ */ - -/* How big is the saved FPU state? */ -#define VG_SIZE_OF_FPUSTATE 108 -/* ... and in words ... */ -#define VG_SIZE_OF_FPUSTATE_W ((VG_SIZE_OF_FPUSTATE+3)/4) - - -/* --------------------------------------------------------------------- - Exports of vg_scheduler.c - ------------------------------------------------------------------ */ - -/* ThreadIds are simply indices into the vg_threads[] array. */ -typedef - UInt - ThreadId; - -/* Special magic value for an invalid ThreadId. It corresponds to - LinuxThreads using zero as the initial value for - pthread_mutex_t.__m_owner and pthread_cond_t.__c_waiting. */ -#define VG_INVALID_THREADID ((ThreadId)(0)) - -typedef - enum { - VgTs_Empty, /* this slot is not in use */ - VgTs_Runnable, /* waiting to be scheduled */ - VgTs_WaitJoiner, /* waiting for someone to do join on me */ - VgTs_WaitJoinee, /* waiting for the thread I did join on */ - VgTs_WaitFD, /* waiting for I/O completion on a fd */ - VgTs_WaitMX, /* waiting on a mutex */ - VgTs_WaitCV, /* waiting on a condition variable */ - VgTs_WaitSIG, /* waiting due to sigwait() */ - VgTs_Sleeping /* sleeping for a while */ - } - ThreadStatus; - -/* An entry in a threads's cleanup stack. */ -typedef - struct { - void (*fn)(void*); - void* arg; - } - CleanupEntry; - -/* An entry in a thread's fork-handler stack. */ -typedef - struct { - void (*prepare)(void); - void (*parent)(void); - void (*child)(void); - } - ForkHandlerEntry; - - -typedef - struct { - /* ThreadId == 0 (and hence vg_threads[0]) is NEVER USED. - The thread identity is simply the index in vg_threads[]. - ThreadId == 1 is the root thread and has the special property - that we don't try and allocate or deallocate its stack. For - convenience of generating error message, we also put the - ThreadId in this tid field, but be aware that it should - ALWAYS == the index in vg_threads[]. */ - ThreadId tid; - - /* Current scheduling status. - - Complications: whenever this is set to VgTs_WaitMX, you - should also set .m_edx to whatever the required return value - is for pthread_mutex_lock / pthread_cond_timedwait for when - the mutex finally gets unblocked. */ - ThreadStatus status; - - /* When .status == WaitMX, points to the mutex I am waiting for. - When .status == WaitCV, points to the mutex associated with - the condition variable indicated by the .associated_cv field. - In all other cases, should be NULL. */ - void* /* pthread_mutex_t* */ associated_mx; - - /* When .status == WaitCV, points to the condition variable I am - waiting for. In all other cases, should be NULL. */ - void* /* pthread_cond_t* */ associated_cv; - - /* If VgTs_Sleeping, this is when we should wake up, measured in - milliseconds as supplied by VG_(read_millisecond_counter). - - If VgTs_WaitCV, this indicates the time at which - pthread_cond_timedwait should wake up. If == 0xFFFFFFFF, - this means infinitely far in the future, viz, - pthread_cond_wait. */ - UInt awaken_at; - - /* If VgTs_WaitJoiner, return value, as generated by joinees. */ - void* joinee_retval; - - /* If VgTs_WaitJoinee, place to copy the return value to, and - the identity of the thread we're waiting for. */ - void** joiner_thread_return; - ThreadId joiner_jee_tid; - - /* Whether or not detached. */ - Bool detached; - - /* Cancelability state and type. */ - Bool cancel_st; /* False==PTH_CANCEL_DISABLE; True==.._ENABLE */ - Bool cancel_ty; /* False==PTH_CANC_ASYNCH; True==..._DEFERRED */ - - /* Pointer to fn to call to do cancellation. Indicates whether - or not cancellation is pending. If NULL, not pending. Else - should be &thread_exit_wrapper(), indicating that - cancallation is pending. */ - void (*cancel_pend)(void*); - - /* The cleanup stack. */ - Int custack_used; - CleanupEntry custack[VG_N_CLEANUPSTACK]; - - /* thread-specific data */ - void* specifics[VG_N_THREAD_KEYS]; - - /* This thread's blocked-signals mask. Semantics is that for a - signal to be delivered to this thread, the signal must not be - blocked by either the process-wide signal mask nor by this - one. So, if this thread is prepared to handle any signal that - the process as a whole is prepared to handle, this mask should - be made empty -- and that it is its default, starting - state. */ - vki_ksigset_t sig_mask; - - /* When not VgTs_WaitSIG, has no meaning. When VgTs_WaitSIG, - is the set of signals for which we are sigwait()ing. */ - vki_ksigset_t sigs_waited_for; - - /* Counts the number of times a signal handler for this thread - has returned. This makes it easy to implement pause(), by - polling this value, of course interspersed with nanosleeps, - and waiting till it changes. */ - UInt n_signals_returned; - - /* Stacks. When a thread slot is freed, we don't deallocate its - stack; we just leave it lying around for the next use of the - slot. If the next use of the slot requires a larger stack, - only then is the old one deallocated and a new one - allocated. - - For the main thread (threadid == 0), this mechanism doesn't - apply. We don't know the size of the stack since we didn't - allocate it, and furthermore we never reallocate it. */ - - /* The allocated size of this thread's stack (permanently zero - if this is ThreadId == 0, since we didn't allocate its stack) */ - UInt stack_size; - - /* Address of the lowest word in this thread's stack. NULL means - not allocated yet. - */ - Addr stack_base; - - /* Address of the highest legitimate word in this stack. This is - used for error messages only -- not critical for execution - correctness. Is is set for all stacks, specifically including - ThreadId == 0 (the main thread). */ - Addr stack_highest_word; - - /* Saved machine context. */ - UInt m_eax; - UInt m_ebx; - UInt m_ecx; - UInt m_edx; - UInt m_esi; - UInt m_edi; - UInt m_ebp; - UInt m_esp; - UInt m_eflags; - UInt m_eip; - UInt m_fpu[VG_SIZE_OF_FPUSTATE_W]; - - UInt sh_eax; - UInt sh_ebx; - UInt sh_ecx; - UInt sh_edx; - UInt sh_esi; - UInt sh_edi; - UInt sh_ebp; - UInt sh_esp; - UInt sh_eflags; - } - ThreadState; - - -/* The thread table. */ -extern ThreadState VG_(threads)[VG_N_THREADS]; - -/* Check that tid is in range and denotes a non-Empty thread. */ -extern Bool VG_(is_valid_tid) ( ThreadId tid ); - -/* Check that tid is in range. */ -extern Bool VG_(is_valid_or_empty_tid) ( ThreadId tid ); - -/* Copy the specified thread's state into VG_(baseBlock) in - preparation for running it. */ -extern void VG_(load_thread_state)( ThreadId ); - -/* Save the specified thread's state back in VG_(baseBlock), and fill - VG_(baseBlock) with junk, for sanity-check reasons. */ -extern void VG_(save_thread_state)( ThreadId ); - -/* And for the currently running one, if valid. */ -extern ThreadState* VG_(get_current_thread_state) ( void ); - -/* Similarly ... */ -extern ThreadId VG_(get_current_tid) ( void ); - -/* Which thread is this address in the stack of, if any? Used for - error message generation. */ -extern ThreadId VG_(identify_stack_addr)( Addr a ); - -/* Nuke all threads except tid. */ -extern void VG_(nuke_all_threads_except) ( ThreadId me ); - - -/* Return codes from the scheduler. */ -typedef - enum { - VgSrc_Deadlock, /* no runnable threads and no prospect of any - even if we wait for a long time */ - VgSrc_ExitSyscall, /* client called exit(). This is the normal - route out. */ - VgSrc_BbsDone /* In a debugging run, the specified number of - bbs has been completed. */ - } - VgSchedReturnCode; - - -/* The scheduler. */ -extern VgSchedReturnCode VG_(scheduler) ( void ); - -extern void VG_(scheduler_init) ( void ); - -extern void VG_(pp_sched_status) ( void ); - -/* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */ -extern jmp_buf VG_(scheduler_jmpbuf); -/* This says whether scheduler_jmpbuf is actually valid. Needed so - that our signal handler doesn't longjmp when the buffer isn't - actually valid. */ -extern Bool VG_(scheduler_jmpbuf_valid); -/* ... and if so, here's the signal which caused it to do so. */ -extern Int VG_(longjmpd_on_signal); - - -/* Possible places where the main stack might be based. We check that - the initial stack, which we can't move, is allocated here. - VG_(scheduler_init) checks this. Andrea Archelangi's 2.4 kernels - have been rumoured to start stacks at 0x80000000, so that too is - considered. It seems systems with longer uptimes tend to to use - stacks which start at 0x40000000 sometimes. -*/ -#define VG_STARTUP_STACK_BASE_1 (Addr)0xC0000000 -#define VG_STARTUP_STACK_BASE_2 (Addr)0x80000000 -#define VG_STARTUP_STACK_BASE_3 (Addr)0x40000000 -#define VG_STARTUP_STACK_SMALLERTHAN 0x100000 /* 1024k */ - -#define VG_STACK_MATCHES_BASE(zzstack, zzbase) \ - ( \ - ((zzstack) & ((zzbase) - VG_STARTUP_STACK_SMALLERTHAN)) \ - == \ - ((zzbase) - VG_STARTUP_STACK_SMALLERTHAN) \ - ) - - -/* The red-zone size which we put at the bottom (highest address) of - thread stacks, for paranoia reasons. This can be arbitrary, and - doesn't really need to be set at compile time. */ -#define VG_AR_CLIENT_STACKBASE_REDZONE_SZW 4 - -#define VG_AR_CLIENT_STACKBASE_REDZONE_SZB \ - (VG_AR_CLIENT_STACKBASE_REDZONE_SZW * VKI_BYTES_PER_WORD) - - -/* Write a value to the client's %EDX (request return value register) - and set the shadow to indicate it is defined. */ -#define SET_EDX(zztid, zzval) \ - do { VG_(threads)[zztid].m_edx = (zzval); \ - VG_(threads)[zztid].sh_edx = VGM_WORD_VALID; \ - } while (0) - -#define SET_EAX(zztid, zzval) \ - do { VG_(threads)[zztid].m_eax = (zzval); \ - VG_(threads)[zztid].sh_eax = VGM_WORD_VALID; \ - } while (0) - - -/* --------------------------------------------------------------------- - Exports of vg_signals.c - ------------------------------------------------------------------ */ - -extern void VG_(sigstartup_actions) ( void ); - -extern Bool VG_(deliver_signals) ( void ); -extern void VG_(unblock_host_signal) ( Int sigNo ); -extern void VG_(handle_SCSS_change) ( Bool force_update ); - - -/* Fake system calls for signal handling. */ -extern void VG_(do__NR_sigaltstack) ( ThreadId tid ); -extern void VG_(do__NR_sigaction) ( ThreadId tid ); -extern void VG_(do__NR_sigprocmask) ( ThreadId tid, - Int how, - vki_ksigset_t* set, - vki_ksigset_t* oldset ); -extern void VG_(do_pthread_sigmask_SCSS_upd) ( ThreadId tid, - Int how, - vki_ksigset_t* set, - vki_ksigset_t* oldset ); -extern void VG_(send_signal_to_thread) ( ThreadId thread, - Int signo ); - -extern void VG_(do_sigpending) ( ThreadId tid, vki_ksigset_t* set ); - - -/* Modify the current thread's state once we have detected it is - returning from a signal handler. */ -extern Bool VG_(signal_returns) ( ThreadId ); - -/* Handy utilities to block/restore all host signals. */ -extern void VG_(block_all_host_signals) - ( /* OUT */ vki_ksigset_t* saved_mask ); -extern void VG_(restore_all_host_signals) - ( /* IN */ vki_ksigset_t* saved_mask ); - -/* --------------------------------------------------------------------- - Exports of vg_mylibc.c - ------------------------------------------------------------------ */ - - -#if !defined(NULL) -# define NULL ((void*)0) -#endif - -extern void VG_(exit)( Int status ) - __attribute__ ((__noreturn__)); - -extern void VG_(printf) ( const char *format, ... ); -/* too noisy ... __attribute__ ((format (printf, 1, 2))) ; */ - -extern void VG_(sprintf) ( Char* buf, Char *format, ... ); - -extern void VG_(vprintf) ( void(*send)(Char), - const Char *format, va_list vargs ); - -extern Bool VG_(isspace) ( Char c ); -extern Bool VG_(isdigit) ( Char c ); - -extern Int VG_(strlen) ( const Char* str ); - -extern Long VG_(atoll) ( Char* str ); -extern Long VG_(atoll36) ( Char* str ); - -extern Char* VG_(strcat) ( Char* dest, const Char* src ); -extern Char* VG_(strncat) ( Char* dest, const Char* src, Int n ); -extern Char* VG_(strpbrk) ( const Char* s, const Char* accept ); - -extern Char* VG_(strcpy) ( Char* dest, const Char* src ); - -extern Int VG_(strcmp) ( const Char* s1, const Char* s2 ); -extern Int VG_(strcmp_ws) ( const Char* s1, const Char* s2 ); - -extern Int VG_(strncmp) ( const Char* s1, const Char* s2, Int nmax ); -extern Int VG_(strncmp_ws) ( const Char* s1, const Char* s2, Int nmax ); - -extern Char* VG_(strstr) ( const Char* haystack, Char* needle ); -extern Char* VG_(strchr) ( const Char* s, Char c ); -extern Char* VG_(strdup) ( ArenaId aid, const Char* s); - -extern Char* VG_(getenv) ( Char* name ); -extern Int VG_(getpid) ( void ); - -extern void VG_(start_rdtsc_calibration) ( void ); -extern void VG_(end_rdtsc_calibration) ( void ); -extern UInt VG_(read_millisecond_timer) ( void ); - - -extern Char VG_(toupper) ( Char c ); - -extern void VG_(strncpy_safely) ( Char* dest, const Char* src, Int ndest ); - -extern void VG_(strncpy) ( Char* dest, const Char* src, Int ndest ); - -extern Bool VG_(stringMatch) ( Char* pat, Char* str ); - - -#define VG__STRING(__str) #__str - -/* Asserts are permanently enabled. Hurrah! */ -#define vg_assert(expr) \ - ((void) ((expr) ? 0 : \ - (VG_(assert_fail) (VG__STRING(expr), \ - __FILE__, __LINE__, \ - __PRETTY_FUNCTION__), 0))) - -extern void VG_(assert_fail) ( Char* expr, Char* file, - Int line, Char* fn ) - __attribute__ ((__noreturn__)); - -/* Reading and writing files. */ -extern Int VG_(open_read) ( Char* pathname ); -extern Int VG_(open_write) ( Char* pathname ); -extern Int VG_(create_and_write) ( Char* pathname ); -extern void VG_(close) ( Int fd ); -extern Int VG_(read) ( Int fd, void* buf, Int count); -extern Int VG_(write) ( Int fd, void* buf, Int count); -extern Int VG_(stat) ( Char* file_name, struct vki_stat* buf ); - -extern Int VG_(fcntl) ( Int fd, Int cmd, Int arg ); - -extern Int VG_(select)( Int n, - vki_fd_set* readfds, - vki_fd_set* writefds, - vki_fd_set* exceptfds, - struct vki_timeval * timeout ); -extern Int VG_(nanosleep)( const struct vki_timespec *req, - struct vki_timespec *rem ); - - -/* mmap-ery ... */ -extern void* VG_(mmap)( void* start, UInt length, - UInt prot, UInt flags, UInt fd, UInt offset ); - -extern Int VG_(munmap)( void* start, Int length ); - -extern void* VG_(brk) ( void* end_data_segment ); - - -/* Print a (panic) message, and abort. */ -extern void VG_(panic) ( Char* str ) - __attribute__ ((__noreturn__)); - -/* Get memory by anonymous mmap. */ -extern void* VG_(get_memory_from_mmap) ( Int nBytes, Char* who ); - -/* Crude stand-in for the glibc system() call. */ -extern Int VG_(system) ( Char* cmd ); - - -/* Signal stuff. Note that these use the vk_ (kernel) structure - definitions, which are different in places from those that glibc - defines. Since we're operating right at the kernel interface, - glibc's view of the world is entirely irrelevant. */ - -/* --- Signal set ops --- */ -extern Int VG_(ksigfillset)( vki_ksigset_t* set ); -extern Int VG_(ksigemptyset)( vki_ksigset_t* set ); - -extern Bool VG_(kisfullsigset)( vki_ksigset_t* set ); -extern Bool VG_(kisemptysigset)( vki_ksigset_t* set ); - -extern Int VG_(ksigaddset)( vki_ksigset_t* set, Int signum ); -extern Int VG_(ksigdelset)( vki_ksigset_t* set, Int signum ); -extern Int VG_(ksigismember) ( vki_ksigset_t* set, Int signum ); - -extern void VG_(ksigaddset_from_set)( vki_ksigset_t* dst, - vki_ksigset_t* src ); -extern void VG_(ksigdelset_from_set)( vki_ksigset_t* dst, - vki_ksigset_t* src ); - -/* --- Mess with the kernel's sig state --- */ -extern Int VG_(ksigprocmask)( Int how, const vki_ksigset_t* set, - vki_ksigset_t* oldset ); -extern Int VG_(ksigaction) ( Int signum, - const vki_ksigaction* act, - vki_ksigaction* oldact ); - -extern Int VG_(ksignal)(Int signum, void (*sighandler)(Int)); - -extern Int VG_(ksigaltstack)( const vki_kstack_t* ss, vki_kstack_t* oss ); - -extern Int VG_(kill)( Int pid, Int signo ); -extern Int VG_(sigpending) ( vki_ksigset_t* set ); - - -/* --------------------------------------------------------------------- - Definitions for the JITter (vg_translate.c, vg_to_ucode.c, - vg_from_ucode.c). - ------------------------------------------------------------------ */ - -/* Tags which describe what operands are. */ -typedef - enum { TempReg=0, ArchReg=1, RealReg=2, - SpillNo=3, Literal=4, Lit16=5, - NoValue=6 } - Tag; - - -/* Microinstruction opcodes. */ -typedef - enum { - NOP, - GET, - PUT, - LOAD, - STORE, - MOV, - CMOV, /* Used for cmpxchg and cmov */ - WIDEN, - JMP, - - /* Read/write the %EFLAGS register into a TempReg. */ - GETF, PUTF, - - ADD, ADC, AND, OR, XOR, SUB, SBB, - SHL, SHR, SAR, ROL, ROR, RCL, RCR, - NOT, NEG, INC, DEC, BSWAP, - CC2VAL, - - /* Not strictly needed, but useful for making better - translations of address calculations. */ - LEA1, /* reg2 := const + reg1 */ - LEA2, /* reg3 := const + reg1 + reg2 * 1,2,4 or 8 */ - - /* not for translating x86 calls -- only to call helpers */ - CALLM_S, CALLM_E, /* Mark start and end of push/pop sequences - for CALLM. */ - PUSH, POP, CLEAR, /* Add/remove/zap args for helpers. */ - CALLM, /* call to a machine-code helper */ - - /* Hack for translating string (REP-) insns. Jump to literal if - TempReg/RealReg is zero. */ - JIFZ, - - /* FPU ops which read/write mem or don't touch mem at all. */ - FPU_R, - FPU_W, - FPU, - - /* Advance the simulated %eip by some small (< 128) number. */ - INCEIP, - - /* uinstrs which are not needed for mere translation of x86 code, - only for instrumentation of it. */ - LOADV, - STOREV, - GETV, - PUTV, - TESTV, - SETV, - /* Get/set the v-bit (and it is only one bit) for the simulated - %eflags register. */ - GETVF, - PUTVF, - - /* Do a unary or binary tag op. Only for post-instrumented - code. For TAG1, first and only arg is a TempReg, and is both - arg and result reg. For TAG2, first arg is src, second is - dst, in the normal way; both are TempRegs. In both cases, - 3rd arg is a RiCHelper with a Lit16 tag. This indicates - which tag op to do. */ - TAG1, - TAG2 - } - Opcode; - - -/* Condition codes, observing the Intel encoding. CondAlways is an - extra. */ -typedef - enum { - CondO = 0, /* overflow */ - CondNO = 1, /* no overflow */ - CondB = 2, /* below */ - CondNB = 3, /* not below */ - CondZ = 4, /* zero */ - CondNZ = 5, /* not zero */ - CondBE = 6, /* below or equal */ - CondNBE = 7, /* not below or equal */ - CondS = 8, /* negative */ - ConsNS = 9, /* not negative */ - CondP = 10, /* parity even */ - CondNP = 11, /* not parity even */ - CondL = 12, /* jump less */ - CondNL = 13, /* not less */ - CondLE = 14, /* less or equal */ - CondNLE = 15, /* not less or equal */ - CondAlways = 16 /* Jump always */ - } - Condcode; - - -/* Descriptions of additional properties of *unconditional* jumps. */ -typedef - enum { - JmpBoring=0, /* boring unconditional jump */ - JmpCall=1, /* jump due to an x86 call insn */ - JmpRet=2, /* jump due to an x86 ret insn */ - JmpSyscall=3, /* do a system call, then jump */ - JmpClientReq=4 /* do a client request, then jump */ - } - JmpKind; - - -/* Flags. User-level code can only read/write O(verflow), S(ign), - Z(ero), A(ux-carry), C(arry), P(arity), and may also write - D(irection). That's a total of 7 flags. A FlagSet is a bitset, - thusly: - 76543210 - DOSZACP - and bit 7 must always be zero since it is unused. -*/ -typedef UChar FlagSet; - -#define FlagD (1<<6) -#define FlagO (1<<5) -#define FlagS (1<<4) -#define FlagZ (1<<3) -#define FlagA (1<<2) -#define FlagC (1<<1) -#define FlagP (1<<0) - -#define FlagsOSZACP (FlagO | FlagS | FlagZ | FlagA | FlagC | FlagP) -#define FlagsOSZAP (FlagO | FlagS | FlagZ | FlagA | FlagP) -#define FlagsOSZCP (FlagO | FlagS | FlagZ | FlagC | FlagP) -#define FlagsOSACP (FlagO | FlagS | FlagA | FlagC | FlagP) -#define FlagsSZACP ( FlagS | FlagZ | FlagA | FlagC | FlagP) -#define FlagsSZAP ( FlagS | FlagZ | FlagA | FlagP) -#define FlagsZCP ( FlagZ | FlagC | FlagP) -#define FlagsOC (FlagO | FlagC ) -#define FlagsAC ( FlagA | FlagC ) - -#define FlagsALL (FlagsOSZACP | FlagD) -#define FlagsEmpty (FlagSet)0 - -#define VG_IS_FLAG_SUBSET(set1,set2) \ - (( ((FlagSet)set1) & ((FlagSet)set2) ) == ((FlagSet)set1) ) - -#define VG_UNION_FLAG_SETS(set1,set2) \ - ( ((FlagSet)set1) | ((FlagSet)set2) ) - - - -/* A Micro (u)-instruction. */ -typedef - struct { - /* word 1 */ - UInt lit32; /* 32-bit literal */ - - /* word 2 */ - UShort val1; /* first operand */ - UShort val2; /* second operand */ - - /* word 3 */ - UShort val3; /* third operand */ - UChar opcode; /* opcode */ - UChar size; /* data transfer size */ - - /* word 4 */ - FlagSet flags_r; /* :: FlagSet */ - FlagSet flags_w; /* :: FlagSet */ - UChar tag1:4; /* first operand tag */ - UChar tag2:4; /* second operand tag */ - UChar tag3:4; /* third operand tag */ - UChar extra4b:4; /* Spare field, used by WIDEN for src - -size, and by LEA2 for scale - (1,2,4 or 8), and by unconditional JMPs for - orig x86 instr size if --cachesim=yes */ - - - /* word 5 */ - UChar cond; /* condition, for jumps */ - Bool smc_check:1; /* do a smc test, if writes memory. */ - Bool signed_widen:1; /* signed or unsigned WIDEN ? */ - JmpKind jmpkind:3; /* additional properties of unconditional JMP */ - } - UInstr; - - -/* Expandable arrays of uinstrs. */ -typedef - struct { - Int used; - Int size; - UInstr* instrs; - Int nextTemp; - } - UCodeBlock; - -/* Refer to `the last instruction stuffed in', including as an - lvalue. */ -#define LAST_UINSTR(cb) (cb)->instrs[(cb)->used-1] - -/* An invalid temporary number :-) */ -#define INVALID_TEMPREG 999999999 - - -/* --------------------------------------------------------------------- - Exports of vg_demangle.c - ------------------------------------------------------------------ */ - -extern void VG_(demangle) ( Char* orig, Char* result, Int result_size ); - - -/* --------------------------------------------------------------------- - Exports of vg_from_ucode.c - ------------------------------------------------------------------ */ - -extern UChar* VG_(emit_code) ( UCodeBlock* cb, Int* nbytes ); - - -/* --------------------------------------------------------------------- - Exports of vg_to_ucode.c - ------------------------------------------------------------------ */ - -extern Int VG_(disBB) ( UCodeBlock* cb, Addr eip0 ); -extern Char* VG_(nameOfIntReg) ( Int size, Int reg ); -extern Char VG_(nameOfIntSize) ( Int size ); -extern UInt VG_(extend_s_8to32) ( UInt x ); -extern Int VG_(getNewTemp) ( UCodeBlock* cb ); -extern Int VG_(getNewShadow) ( UCodeBlock* cb ); - -#define SHADOW(tempreg) ((tempreg)+1) - - -/* --------------------------------------------------------------------- - Exports of vg_translate.c - ------------------------------------------------------------------ */ - -extern void VG_(translate) ( ThreadState* tst, - Addr orig_addr, - UInt* orig_size, - Addr* trans_addr, - UInt* trans_size ); - -extern void VG_(emptyUInstr) ( UInstr* u ); -extern void VG_(newUInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz ); -extern void VG_(newUInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz, - Tag tag1, UInt val1 ); -extern void VG_(newUInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz, - Tag tag1, UInt val1, - Tag tag2, UInt val2 ); -extern void VG_(newUInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz, - Tag tag1, UInt val1, - Tag tag2, UInt val2, - Tag tag3, UInt val3 ); -extern void VG_(setFlagRW) ( UInstr* u, - FlagSet fr, FlagSet fw ); - -extern void VG_(setLiteralField) ( UCodeBlock* cb, UInt lit32 ); -extern Bool VG_(anyFlagUse) ( UInstr* u ); - - - -extern void VG_(ppUInstr) ( Int instrNo, UInstr* u ); -extern void VG_(ppUCodeBlock) ( UCodeBlock* cb, Char* title ); - -extern UCodeBlock* VG_(allocCodeBlock) ( void ); -extern void VG_(freeCodeBlock) ( UCodeBlock* cb ); -extern void VG_(copyUInstr) ( UCodeBlock* cb, UInstr* instr ); - -extern Char* VG_(nameCondcode) ( Condcode cond ); -extern Bool VG_(saneUInstr) ( Bool beforeRA, UInstr* u ); -extern Bool VG_(saneUCodeBlock) ( UCodeBlock* cb ); -extern Char* VG_(nameUOpcode) ( Bool upper, Opcode opc ); -extern Int VG_(rankToRealRegNo) ( Int rank ); - -extern void* VG_(jitmalloc) ( Int nbytes ); -extern void VG_(jitfree) ( void* ptr ); - - -/* --------------------------------------------------------------------- - Exports of vg_execontext.c. - ------------------------------------------------------------------ */ - -/* Records the PC and a bit of the call chain. The first 4 %eip - values are used in comparisons do remove duplicate errors, and for - comparing against suppression specifications. The rest are purely - informational (but often important). */ - -typedef - struct _ExeContextRec { - struct _ExeContextRec * next; - /* The size of this array is VG_(clo_backtrace_size); at least - 2, at most VG_DEEPEST_BACKTRACE. [0] is the current %eip, - [1] is its caller, [2] is the caller of [1], etc. */ - Addr eips[0]; - } - ExeContext; - - -/* Initialise the ExeContext storage mechanism. */ -extern void VG_(init_ExeContext_storage) ( void ); - -/* Print stats (informational only). */ -extern void VG_(show_ExeContext_stats) ( void ); - - -/* Take a snapshot of the client's stack. Search our collection of - ExeContexts to see if we already have it, and if not, allocate a - new one. Either way, return a pointer to the context. */ -extern ExeContext* VG_(get_ExeContext) ( Bool skip_top_frame, - Addr eip, Addr ebp ); - -/* Print an ExeContext. */ -extern void VG_(pp_ExeContext) ( ExeContext* ); - -/* Compare two ExeContexts, just comparing the top two callers. */ -extern Bool VG_(eq_ExeContext_top2) ( ExeContext* e1, ExeContext* e2 ); - -/* Compare two ExeContexts, just comparing the top four callers. */ -extern Bool VG_(eq_ExeContext_top4) ( ExeContext* e1, ExeContext* e2 ); - -/* Compare two ExeContexts, comparing all callers. */ -extern Bool VG_(eq_ExeContext_all) ( ExeContext* e1, ExeContext* e2 ); - - - -/* --------------------------------------------------------------------- - Exports of vg_errcontext.c. - ------------------------------------------------------------------ */ - -extern void VG_(load_suppressions) ( void ); -extern void VG_(show_all_errors) ( void ); -extern void VG_(record_value_error) ( Int size ); -extern void VG_(record_free_error) ( ThreadState* tst, Addr a ); -extern void VG_(record_freemismatch_error) ( ThreadState* tst, Addr a ); -extern void VG_(record_address_error) ( Addr a, Int size, - Bool isWrite ); - -extern void VG_(record_jump_error) ( ThreadState* tst, Addr a ); - -extern void VG_(record_param_err) ( ThreadState* tst, - Addr a, - Bool isWriteLack, - Char* msg ); -extern void VG_(record_user_err) ( ThreadState* tst, - Addr a, Bool isWriteLack ); -extern void VG_(record_pthread_err) ( ThreadId tid, Char* msg ); - - - -/* The classification of a faulting address. */ -typedef - enum { Undescribed, /* as-yet unclassified */ - Stack, - Unknown, /* classification yielded nothing useful */ - Freed, Mallocd, - UserG, UserS } - AddrKind; - -/* Records info about a faulting address. */ -typedef - struct { - /* ALL */ - AddrKind akind; - /* Freed, Mallocd */ - Int blksize; - /* Freed, Mallocd */ - Int rwoffset; - /* Freed, Mallocd */ - ExeContext* lastchange; - /* Stack */ - ThreadId stack_tid; - /* True if is just-below %esp -- could be a gcc bug. */ - Bool maybe_gcc; - } - AddrInfo; - - -/* --------------------------------------------------------------------- - Exports of vg_clientperms.c - ------------------------------------------------------------------ */ - -extern Bool VG_(client_perm_maybe_describe)( Addr a, AddrInfo* ai ); - -extern UInt VG_(handle_client_request) ( ThreadState* tst, UInt* arg_block ); - -extern void VG_(delete_client_stack_blocks_following_ESP_change) ( void ); - -extern void VG_(show_client_block_stats) ( void ); - - -/* --------------------------------------------------------------------- - Exports of vg_procselfmaps.c - ------------------------------------------------------------------ */ - -extern -void VG_(read_procselfmaps) ( - void (*record_mapping)( Addr, UInt, Char, Char, Char, UInt, UChar* ) -); - - -/* --------------------------------------------------------------------- - Exports of vg_symtab2.c - ------------------------------------------------------------------ */ - -/* We assume the executable is loaded here ... can't really find - out. There is a hacky sanity check in vg_init_memory_audit() - which should trip up most stupidities. -*/ -#define VG_ASSUMED_EXE_BASE (Addr)0x8048000 - -extern void VG_(read_symbols) ( void ); -extern void VG_(mini_stack_dump) ( ExeContext* ec ); -extern void VG_(what_obj_and_fun_is_this) - ( Addr a, - Char* obj_buf, Int n_obj_buf, - Char* fun_buf, Int n_fun_buf ); -extern Bool VG_(what_line_is_this) ( Addr a, - UChar* filename, Int n_filename, - UInt* lineno ); -extern Bool VG_(what_fn_is_this) ( Bool no_demangle, Addr a, - Char* fn_name, Int n_fn_name); - -extern Bool VG_(symtab_notify_munmap) ( Addr start, UInt length ); - - -/* --------------------------------------------------------------------- - Exports of vg_clientmalloc.c - ------------------------------------------------------------------ */ - -typedef - enum { - Vg_AllocMalloc = 0, - Vg_AllocNew = 1, - Vg_AllocNewVec = 2 - } - VgAllocKind; - -/* Description of a malloc'd chunk. */ -typedef - struct _ShadowChunk { - struct _ShadowChunk* next; - ExeContext* where; /* where malloc'd/free'd */ - UInt size : 30; /* size requested. */ - VgAllocKind allockind : 2; /* which wrapper did the allocation */ - Addr data; /* ptr to actual block. */ - } - ShadowChunk; - -extern void VG_(clientmalloc_done) ( void ); -extern void VG_(describe_addr) ( Addr a, AddrInfo* ai ); -extern ShadowChunk** VG_(get_malloc_shadows) ( /*OUT*/ UInt* n_shadows ); - -/* These are called from the scheduler, when it intercepts a user - request. */ -extern void* VG_(client_malloc) ( ThreadState* tst, - UInt size, VgAllocKind kind ); -extern void* VG_(client_memalign) ( ThreadState* tst, - UInt align, UInt size ); -extern void VG_(client_free) ( ThreadState* tst, - void* ptrV, VgAllocKind kind ); -extern void* VG_(client_calloc) ( ThreadState* tst, - UInt nmemb, UInt size1 ); -extern void* VG_(client_realloc) ( ThreadState* tst, - void* ptrV, UInt size_new ); - - -/* --------------------------------------------------------------------- - Exports of vg_main.c - ------------------------------------------------------------------ */ - -/* A structure used as an intermediary when passing the simulated - CPU's state to some assembly fragments, particularly system calls. - Stuff is copied from baseBlock to here, the assembly magic runs, - and then the inverse copy is done. */ - -extern UInt VG_(m_state_static) [8 /* int regs, in Intel order */ - + 1 /* %eflags */ - + 1 /* %eip */ - + VG_SIZE_OF_FPUSTATE_W /* FPU state */ - ]; - -/* Handy fns for doing the copy back and forth. */ -extern void VG_(copy_baseBlock_to_m_state_static) ( void ); -extern void VG_(copy_m_state_static_to_baseBlock) ( void ); - -/* Called when some unhandleable client behaviour is detected. - Prints a msg and aborts. */ -extern void VG_(unimplemented) ( Char* msg ); -extern void VG_(nvidia_moan) ( void ); - -/* The stack on which Valgrind runs. We can't use the same stack as the - simulatee -- that's an important design decision. */ -extern UInt VG_(stack)[10000]; - -/* Similarly, we have to ask for signals to be delivered on an - alternative stack, since it is possible, although unlikely, that - we'll have to run client code from inside the Valgrind-installed - signal handler. If this happens it will be done by - vg_deliver_signal_immediately(). */ -extern UInt VG_(sigstack)[10000]; - -/* Holds client's %esp at the point we gained control. From this the - client's argc, argv and envp are deduced. */ -extern Addr VG_(esp_at_startup); -extern Int VG_(client_argc); -extern Char** VG_(client_argv); -extern Char** VG_(client_envp); - -/* Remove valgrind.so from a LD_PRELOAD=... string so child processes - don't get traced into. Also mess up $libdir/valgrind so that our - libpthread.so disappears from view. */ -void VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH) ( Char* ld_preload_str, - Char* ld_library_path_str ); - -/* Something of a function looking for a home ... start up GDB. This - is called from VG_(swizzle_esp_then_start_GDB) and so runs on the - *client's* stack. This is necessary to give GDB the illusion that - the client program really was running on the real cpu. */ -extern void VG_(start_GDB_whilst_on_client_stack) ( void ); - -/* Spew out vast amounts of junk during JITting? */ -extern Bool VG_(disassemble); - -/* 64-bit counter for the number of basic blocks done. */ -extern ULong VG_(bbs_done); -/* 64-bit counter for the number of bbs to go before a debug exit. */ -extern ULong VG_(bbs_to_go); - -/* Counts downwards in vg_run_innerloop. */ -extern UInt VG_(dispatch_ctr); - -/* Is the client running on the simulated CPU or the real one? */ -extern Bool VG_(running_on_simd_CPU); /* Initially False */ - -/* The current LRU epoch. */ -extern UInt VG_(current_epoch); - -/* This is the ThreadId of the last thread the scheduler ran. */ -extern ThreadId VG_(last_run_tid); - - -/* --- Counters, for informational purposes only. --- */ - -/* Number of lookups which miss the fast tt helper. */ -extern UInt VG_(tt_fast_misses); - -/* Counts for LRU informational messages. */ - -/* Number and total o/t size of new translations this epoch. */ -extern UInt VG_(this_epoch_in_count); -extern UInt VG_(this_epoch_in_osize); -extern UInt VG_(this_epoch_in_tsize); -/* Number and total o/t size of discarded translations this epoch. */ -extern UInt VG_(this_epoch_out_count); -extern UInt VG_(this_epoch_out_osize); -extern UInt VG_(this_epoch_out_tsize); -/* Number and total o/t size of translations overall. */ -extern UInt VG_(overall_in_count); -extern UInt VG_(overall_in_osize); -extern UInt VG_(overall_in_tsize); -/* Number and total o/t size of discards overall. */ -extern UInt VG_(overall_out_count); -extern UInt VG_(overall_out_osize); -extern UInt VG_(overall_out_tsize); - -/* The number of LRU-clearings of TT/TC. */ -extern UInt VG_(number_of_lrus); - -/* Counts pertaining to the register allocator. */ - -/* total number of uinstrs input to reg-alloc */ -extern UInt VG_(uinstrs_prealloc); - -/* total number of uinstrs added due to spill code */ -extern UInt VG_(uinstrs_spill); - -/* number of bbs requiring spill code */ -extern UInt VG_(translations_needing_spill); - -/* total of register ranks over all translations */ -extern UInt VG_(total_reg_rank); - -/* Counts pertaining to internal sanity checking. */ -extern UInt VG_(sanity_fast_count); -extern UInt VG_(sanity_slow_count); - -/* Counts pertaining to the scheduler. */ -extern UInt VG_(num_scheduling_events_MINOR); -extern UInt VG_(num_scheduling_events_MAJOR); - - -/* --------------------------------------------------------------------- - Exports of vg_memory.c - ------------------------------------------------------------------ */ - -extern void VGM_(init_memory_audit) ( void ); -extern Addr VGM_(curr_dataseg_end); -extern void VG_(show_reg_tags) ( void ); -extern void VG_(detect_memory_leaks) ( void ); -extern void VG_(done_prof_mem) ( void ); - -/* Set permissions for an address range. Not speed-critical. */ -extern void VGM_(make_noaccess) ( Addr a, UInt len ); -extern void VGM_(make_writable) ( Addr a, UInt len ); -extern void VGM_(make_readable) ( Addr a, UInt len ); -/* Use with care! (read: use for shmat only) */ -extern void VGM_(make_readwritable) ( Addr a, UInt len ); -extern void VGM_(copy_address_range_perms) ( Addr src, Addr dst, - UInt len ); - -/* Check permissions for an address range. Not speed-critical. */ -extern Bool VGM_(check_writable) ( Addr a, UInt len, Addr* bad_addr ); -extern Bool VGM_(check_readable) ( Addr a, UInt len, Addr* bad_addr ); -extern Bool VGM_(check_readable_asciiz) ( Addr a, Addr* bad_addr ); - -/* Sanity checks which may be done at any time. The scheduler decides - when. */ -extern void VG_(do_sanity_checks) ( Bool force_expensive ); -/* Very cheap ... */ -extern Bool VG_(first_and_last_secondaries_look_plausible) ( void ); - -/* These functions are called from generated code. */ -extern void VG_(helperc_STOREV4) ( UInt, Addr ); -extern void VG_(helperc_STOREV2) ( UInt, Addr ); -extern void VG_(helperc_STOREV1) ( UInt, Addr ); - -extern UInt VG_(helperc_LOADV1) ( Addr ); -extern UInt VG_(helperc_LOADV2) ( Addr ); -extern UInt VG_(helperc_LOADV4) ( Addr ); - -extern void VGM_(handle_esp_assignment) ( Addr new_espA ); -extern void VGM_(fpu_write_check) ( Addr addr, Int size ); -extern void VGM_(fpu_read_check) ( Addr addr, Int size ); - -/* Safely (avoiding SIGSEGV / SIGBUS) scan the entire valid address - space and pass the addresses and values of all addressible, - defined, aligned words to notify_word. This is the basis for the - leak detector. Returns the number of calls made to notify_word. */ -UInt VG_(scan_all_valid_memory) ( void (*notify_word)( Addr, UInt ) ); - -/* Is this address within some small distance below %ESP? Used only - for the --workaround-gcc296-bugs kludge. */ -extern Bool VG_(is_just_below_ESP)( Addr esp, Addr aa ); - -/* Nasty kludgery to deal with applications which switch stacks, - like netscape. */ -#define VG_PLAUSIBLE_STACK_SIZE 8000000 - -/* Needed by the pthreads implementation. */ -#define VGM_WORD_VALID 0 -#define VGM_WORD_INVALID 0xFFFFFFFF - - -/* --------------------------------------------------------------------- - Exports of vg_syscall_mem.c - ------------------------------------------------------------------ */ - -extern void VG_(perform_assumed_nonblocking_syscall) ( ThreadId tid ); - -extern void VG_(check_known_blocking_syscall) ( ThreadId tid, - Int syscallno, - Int* /*IN*/ res ); - -extern Bool VG_(is_kerror) ( Int res ); - -#define KERNEL_DO_SYSCALL(thread_id, result_lvalue) \ - VG_(load_thread_state)(thread_id); \ - VG_(copy_baseBlock_to_m_state_static)(); \ - VG_(do_syscall)(); \ - VG_(copy_m_state_static_to_baseBlock)(); \ - VG_(save_thread_state)(thread_id); \ - VG_(threads)[thread_id].sh_eax = VGM_WORD_VALID; \ - result_lvalue = VG_(threads)[thread_id].m_eax; - - -/* --------------------------------------------------------------------- - Exports of vg_transtab.c - ------------------------------------------------------------------ */ - -/* An entry in the translation table (TT). */ -typedef - struct { - /* +0 */ Addr orig_addr; - /* +4 */ Addr trans_addr; - /* +8 */ UInt mru_epoch; - /* +12 */ UShort orig_size; - /* +14 */ UShort trans_size; - } - TTEntry; - -/* The number of basic blocks in an epoch (one age-step). */ -#define VG_BBS_PER_EPOCH 20000 - -extern void VG_(get_tt_tc_used) ( UInt* tt_used, UInt* tc_used ); -extern void VG_(maybe_do_lru_pass) ( void ); -extern void VG_(flush_transtab) ( void ); -extern Addr VG_(copy_to_transcache) ( Addr trans_addr, Int trans_size ); -extern void VG_(add_to_trans_tab) ( TTEntry* tte ); -extern void VG_(invalidate_translations) ( Addr start, UInt range ); - -extern void VG_(init_tt_tc) ( void ); - -extern void VG_(sanity_check_tc_tt) ( void ); -extern Addr VG_(search_transtab) ( Addr original_addr ); - -extern void VG_(invalidate_tt_fast)( void ); - - -/* --------------------------------------------------------------------- - Exports of vg_vtagops.c - ------------------------------------------------------------------ */ - -/* Lists the names of value-tag operations used in instrumented - code. These are the third argument to TAG1 and TAG2 uinsns. */ - -typedef - enum { - /* Unary. */ - VgT_PCast40, VgT_PCast20, VgT_PCast10, - VgT_PCast01, VgT_PCast02, VgT_PCast04, - - VgT_PCast14, VgT_PCast12, VgT_PCast11, - - VgT_Left4, VgT_Left2, VgT_Left1, - - VgT_SWiden14, VgT_SWiden24, VgT_SWiden12, - VgT_ZWiden14, VgT_ZWiden24, VgT_ZWiden12, - - /* Binary; 1st is rd; 2nd is rd+wr */ - VgT_UifU4, VgT_UifU2, VgT_UifU1, VgT_UifU0, - VgT_DifD4, VgT_DifD2, VgT_DifD1, - - VgT_ImproveAND4_TQ, VgT_ImproveAND2_TQ, VgT_ImproveAND1_TQ, - VgT_ImproveOR4_TQ, VgT_ImproveOR2_TQ, VgT_ImproveOR1_TQ, - VgT_DebugFn - } - VgTagOp; - -extern Char* VG_(nameOfTagOp) ( VgTagOp ); -extern UInt VG_(DebugFn) ( UInt a1, UInt a2 ); - - -/* --------------------------------------------------------------------- - Exports of vg_syscall.S - ------------------------------------------------------------------ */ - -extern void VG_(do_syscall) ( void ); - - -/* --------------------------------------------------------------------- - Exports of vg_startup.S - ------------------------------------------------------------------ */ - -extern void VG_(switch_to_real_CPU) ( void ); - -extern void VG_(swizzle_esp_then_start_GDB) ( Addr m_eip_at_error, - Addr m_esp_at_error, - Addr m_ebp_at_error ); - - -/* --------------------------------------------------------------------- - Exports of vg_dispatch.S - ------------------------------------------------------------------ */ - -/* Run a thread for a (very short) while, until some event happens - which means we need to defer to the scheduler. */ -extern UInt VG_(run_innerloop) ( void ); - - -/* --------------------------------------------------------------------- - Exports of vg_helpers.S - ------------------------------------------------------------------ */ - -/* Mul, div, etc, -- we don't codegen these directly. */ -extern void VG_(helper_idiv_64_32); -extern void VG_(helper_div_64_32); -extern void VG_(helper_idiv_32_16); -extern void VG_(helper_div_32_16); -extern void VG_(helper_idiv_16_8); -extern void VG_(helper_div_16_8); - -extern void VG_(helper_imul_32_64); -extern void VG_(helper_mul_32_64); -extern void VG_(helper_imul_16_32); -extern void VG_(helper_mul_16_32); -extern void VG_(helper_imul_8_16); -extern void VG_(helper_mul_8_16); - -extern void VG_(helper_CLD); -extern void VG_(helper_STD); -extern void VG_(helper_get_dirflag); - -extern void VG_(helper_CLC); -extern void VG_(helper_STC); - -extern void VG_(helper_shldl); -extern void VG_(helper_shldw); -extern void VG_(helper_shrdl); -extern void VG_(helper_shrdw); - -extern void VG_(helper_RDTSC); -extern void VG_(helper_CPUID); - -extern void VG_(helper_bsf); -extern void VG_(helper_bsr); - -extern void VG_(helper_fstsw_AX); -extern void VG_(helper_SAHF); -extern void VG_(helper_DAS); -extern void VG_(helper_DAA); - -extern void VG_(helper_value_check4_fail); -extern void VG_(helper_value_check2_fail); -extern void VG_(helper_value_check1_fail); -extern void VG_(helper_value_check0_fail); - -/* NOT A FUNCTION; this is a bogus RETURN ADDRESS. */ -extern void VG_(signalreturn_bogusRA)( void ); - - -/* --------------------------------------------------------------------- - Exports of vg_cachesim.c - ------------------------------------------------------------------ */ - -extern Int VG_(log2) ( Int x ); - -extern UCodeBlock* VG_(cachesim_instrument) ( UCodeBlock* cb_in, - Addr orig_addr ); - -typedef struct _iCC iCC; -typedef struct _idCC idCC; - -extern void VG_(init_cachesim) ( void ); -extern void VG_(do_cachesim_results)( Int client_argc, Char** client_argv ); - -extern void VG_(cachesim_log_non_mem_instr)( iCC* cc ); -extern void VG_(cachesim_log_mem_instr) ( idCC* cc, Addr data_addr ); - -extern void VG_(cachesim_notify_discard) ( TTEntry* tte ); - - -/* --------------------------------------------------------------------- - The state of the simulated CPU. - ------------------------------------------------------------------ */ - -/* This is the Intel register encoding. */ -#define R_EAX 0 -#define R_ECX 1 -#define R_EDX 2 -#define R_EBX 3 -#define R_ESP 4 -#define R_EBP 5 -#define R_ESI 6 -#define R_EDI 7 - -#define R_AL (0+R_EAX) -#define R_CL (0+R_ECX) -#define R_DL (0+R_EDX) -#define R_BL (0+R_EBX) -#define R_AH (4+R_EAX) -#define R_CH (4+R_ECX) -#define R_DH (4+R_EDX) -#define R_BH (4+R_EBX) - - -/* --------------------------------------------------------------------- - Offsets into baseBlock for everything which needs to referred to - from generated code. The order of these decls does not imply - what the order of the actual offsets is. The latter is important - and is set up in vg_main.c. - ------------------------------------------------------------------ */ - -/* An array of words. In generated code, %ebp always points to the - start of this array. Useful stuff, like the simulated CPU state, - and the addresses of helper functions, can then be found by - indexing off %ebp. The following declares variables which, at - startup time, are given values denoting offsets into baseBlock. - These offsets are in *words* from the start of baseBlock. */ - -#define VG_BASEBLOCK_WORDS 200 - -extern UInt VG_(baseBlock)[VG_BASEBLOCK_WORDS]; - - -/* ----------------------------------------------------- - Read-write parts of baseBlock. - -------------------------------------------------- */ - -/* State of the simulated CPU. */ -extern Int VGOFF_(m_eax); -extern Int VGOFF_(m_ecx); -extern Int VGOFF_(m_edx); -extern Int VGOFF_(m_ebx); -extern Int VGOFF_(m_esp); -extern Int VGOFF_(m_ebp); -extern Int VGOFF_(m_esi); -extern Int VGOFF_(m_edi); -extern Int VGOFF_(m_eflags); -extern Int VGOFF_(m_fpustate); -extern Int VGOFF_(m_eip); - -/* Reg-alloc spill area (VG_MAX_SPILLSLOTS words long). */ -extern Int VGOFF_(spillslots); - -/* Records the valid bits for the 8 integer regs & flags reg. */ -extern Int VGOFF_(sh_eax); -extern Int VGOFF_(sh_ecx); -extern Int VGOFF_(sh_edx); -extern Int VGOFF_(sh_ebx); -extern Int VGOFF_(sh_esp); -extern Int VGOFF_(sh_ebp); -extern Int VGOFF_(sh_esi); -extern Int VGOFF_(sh_edi); -extern Int VGOFF_(sh_eflags); - - -/* ----------------------------------------------------- - Read-only parts of baseBlock. - -------------------------------------------------- */ - -/* Offsets of addresses of helper functions. A "helper" function is - one which is called from generated code. */ - -extern Int VGOFF_(helper_idiv_64_32); -extern Int VGOFF_(helper_div_64_32); -extern Int VGOFF_(helper_idiv_32_16); -extern Int VGOFF_(helper_div_32_16); -extern Int VGOFF_(helper_idiv_16_8); -extern Int VGOFF_(helper_div_16_8); - -extern Int VGOFF_(helper_imul_32_64); -extern Int VGOFF_(helper_mul_32_64); -extern Int VGOFF_(helper_imul_16_32); -extern Int VGOFF_(helper_mul_16_32); -extern Int VGOFF_(helper_imul_8_16); -extern Int VGOFF_(helper_mul_8_16); - -extern Int VGOFF_(helper_CLD); -extern Int VGOFF_(helper_STD); -extern Int VGOFF_(helper_get_dirflag); - -extern Int VGOFF_(helper_CLC); -extern Int VGOFF_(helper_STC); - -extern Int VGOFF_(helper_shldl); -extern Int VGOFF_(helper_shldw); -extern Int VGOFF_(helper_shrdl); -extern Int VGOFF_(helper_shrdw); - -extern Int VGOFF_(helper_RDTSC); -extern Int VGOFF_(helper_CPUID); - -extern Int VGOFF_(helper_bsf); -extern Int VGOFF_(helper_bsr); - -extern Int VGOFF_(helper_fstsw_AX); -extern Int VGOFF_(helper_SAHF); -extern Int VGOFF_(helper_DAS); -extern Int VGOFF_(helper_DAA); - -extern Int VGOFF_(helper_value_check4_fail); -extern Int VGOFF_(helper_value_check2_fail); -extern Int VGOFF_(helper_value_check1_fail); -extern Int VGOFF_(helper_value_check0_fail); - -extern Int VGOFF_(helperc_STOREV4); /* :: UInt -> Addr -> void */ -extern Int VGOFF_(helperc_STOREV2); /* :: UInt -> Addr -> void */ -extern Int VGOFF_(helperc_STOREV1); /* :: UInt -> Addr -> void */ - -extern Int VGOFF_(helperc_LOADV4); /* :: Addr -> UInt -> void */ -extern Int VGOFF_(helperc_LOADV2); /* :: Addr -> UInt -> void */ -extern Int VGOFF_(helperc_LOADV1); /* :: Addr -> UInt -> void */ - -extern Int VGOFF_(handle_esp_assignment); /* :: Addr -> void */ -extern Int VGOFF_(fpu_write_check); /* :: Addr -> Int -> void */ -extern Int VGOFF_(fpu_read_check); /* :: Addr -> Int -> void */ - -extern Int VGOFF_(cachesim_log_non_mem_instr); -extern Int VGOFF_(cachesim_log_mem_instr); - -#endif /* ndef __VG_INCLUDE_H */ - - -/* --------------------------------------------------------------------- - Finally - autoconf-generated settings - ------------------------------------------------------------------ */ - -#include "config.h" - -/*--------------------------------------------------------------------*/ -/*--- end vg_include.h ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_kerneliface.h b/coregrind/vg_kerneliface.h deleted file mode 100644 index bcc10f55e2..0000000000 --- a/coregrind/vg_kerneliface.h +++ /dev/null @@ -1,354 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- A header file defining structures and constants which are ---*/ -/*--- important at the kernel boundary for this platform. ---*/ -/*--- vg_kerneliface.h ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#ifndef __VG_KERNELIFACE_H -#define __VG_KERNELIFACE_H - -/* This file is ONLY to be included into vg_include.h. Do not include - it directly into valgrind source .c files. This file defines types - and constants for the kernel interface, and to make that clear - everything is prefixed VKI. */ - -/*--- All the following stuff is correct for Linux kernels 2.2.X and - 2.4.X. ----*/ - -/* Should really get this from an include file somewhere. */ -#define VKI_BYTES_PER_PAGE_BITS 12 -#define VKI_BYTES_PER_PAGE (1 << VKI_BYTES_PER_PAGE_BITS) - -#define VKI_BYTES_PER_WORD 4 -#define VKI_WORDS_PER_PAGE (VKI_BYTES_PER_PAGE / VKI_BYTES_PER_WORD) - - -/* For system call numbers __NR_... */ -#include - -/* An implementation of signal sets. These are the same as the sigset - implementations in the relevant Linux kernels. Note carefully that - this has nothing to do with glibc's signal sets. We work entirely - at the kernel boundary, so the libc stuff is invisible and - irrelevant. */ - -/* The following is copied from - /usr/src/linux-2.4.9-13/include/asm-i386/signal.h */ -#define VKI_KNSIG 64 /* true for linux 2.2.X and 2.4.X */ -#define VKI_KNSIG_BPW 32 /* since we're using UInts */ -#define VKI_KNSIG_WORDS (VKI_KNSIG / VKI_KNSIG_BPW) - -typedef - struct { - UInt ws[VKI_KNSIG_WORDS]; - } - vki_ksigset_t; - - -typedef - struct { - void* ksa_handler; - unsigned long ksa_flags; - void (*ksa_restorer)(void); - vki_ksigset_t ksa_mask; - } - vki_ksigaction; - -typedef - struct { - void* ss_sp; - Int ss_flags; - UInt ss_size; - } - vki_kstack_t; - - -/* sigaltstack controls */ -#define VKI_SS_ONSTACK 1 -#define VKI_SS_DISABLE 2 - -#define VKI_MINSIGSTKSZ 2048 -#define VKI_SIGSTKSZ 8192 - - - -#define VKI_SIG_BLOCK 0 /* for blocking signals */ -#define VKI_SIG_UNBLOCK 1 /* for unblocking signals */ -#define VKI_SIG_SETMASK 2 /* for setting the signal mask */ - -#define VKI_SIG_DFL ((void*)0) /* default signal handling */ -#define VKI_SIG_IGN ((void*)1) /* ignore signal */ -#define VKI_SIG_ERR ((void*)-1) /* error return from signal */ - -#define VKI_SA_ONSTACK 0x08000000 -#define VKI_SA_RESTART 0x10000000 -#define VKI_SA_NOCLDSTOP 0x00000001 -#define VKI_SA_RESETHAND 0x80000000 -#define VKI_SA_ONESHOT VKI_SA_RESETHAND -#define VKI_SA_NODEFER 0x40000000 -#define VKI_SA_NOMASK VKI_SA_NODEFER -#if 0 -#define VKI_SA_NOCLDWAIT 0x00000002 /* not supported yet */ -#define VKI_SA_SIGINFO 0x00000004 -#define VKI_SA_INTERRUPT 0x20000000 /* dummy -- ignored */ -#define VKI_SA_RESTORER 0x04000000 -#endif - -#define VKI_SIGSEGV 11 -#define VKI_SIGBUS 7 -#define VKI_SIGILL 4 -#define VKI_SIGFPE 8 -#define VKI_SIGKILL 9 -#define VKI_SIGSTOP 19 -#define VKI_SIGTERM 15 -#define VKI_SIGUSR1 10 - -/* The following are copied from include/asm-i386/mman.h .*/ - -#define VKI_PROT_READ 0x1 /* Page can be read. */ -#define VKI_PROT_WRITE 0x2 /* Page can be written. */ -#define VKI_PROT_EXEC 0x4 /* Page can be executed. */ -#define VKI_MAP_ANONYMOUS 0x20 /* Don't use a file. */ -#define VKI_MAP_PRIVATE 0x02 /* Changes are private. */ -#define VKI_MAP_FIXED 0x10 /* Interpret addr exactly */ - - -/* Copied from /usr/src/linux-2.4.9-13/include/asm/errno.h */ - -#define VKI_EPERM 1 /* Operation not permitted */ -#define VKI_EINTR 4 /* Interrupted system call */ -#define VKI_EINVAL 22 /* Invalid argument */ -#define VKI_ENOMEM 12 /* Out of memory */ -#define VKI_EFAULT 14 /* Bad address */ -#define VKI_ESRCH 3 /* No such process */ - -#define VKI_EWOULDBLOCK VKI_EAGAIN /* Operation would block */ -#define VKI_EAGAIN 11 /* Try again */ - - -/* Gawd ... hack ... */ - -typedef struct vki__user_cap_header_struct { - UInt version; - int pid; -} vki_cap_user_header_t; - -typedef struct vki__user_cap_data_struct { - UInt effective; - UInt permitted; - UInt inheritable; -} vki_cap_user_data_t; - - -/* "Byrial Jensen" says: - [various] ioctls take a pointer to a "struct - termios" but this is another and shorter "struct - termios" than the one defined in and used - by tcgetattr(3) and tcsetattr(3) and other library - functions. GNU libc translate between its library - termios and the kernel termios. -*/ - -#define VKI_SIZEOF_STRUCT_TERMIOS 36 - -/* Adam Gundy , 20 Mar 2002, says: */ -#define VKI_SIZEOF_STRUCT_TERMIO 17 - - -/* File descriptor sets, for doing select(). Copied from - /usr/src/linux-2.4.9-31/include/linux/posix_types.h -*/ -/* - * This allows for 1024 file descriptors: if NR_OPEN is ever grown - * beyond that you'll have to change this too. But 1024 fd's seem to be - * enough even for such "real" unices like OSF/1, so hopefully this is - * one limit that doesn't have to be changed [again]. - * - * Note that POSIX wants the FD_CLEAR(fd,fdsetp) defines to be in - * (and thus ) - but this is a more logical - * place for them. Solved by having dummy defines in . - */ - -/* - * Those macros may have been defined in . But we always - * use the ones here. - */ -#undef VKI_NFDBITS -#define VKI_NFDBITS (8 * sizeof(unsigned long)) - -#undef VKI_FD_SETSIZE -#define VKI_FD_SETSIZE 1024 - -#undef VKI_FDSET_LONGS -#define VKI_FDSET_LONGS (VKI_FD_SETSIZE/VKI_NFDBITS) - -#undef VKI_FDELT -#define VKI_FDELT(d) ((d) / VKI_NFDBITS) - -#undef VKI_FDMASK -#define VKI_FDMASK(d) (1UL << ((d) % VKI_NFDBITS)) - -typedef struct { - unsigned long vki_fds_bits [VKI_FDSET_LONGS]; -} vki_fd_set; - - -/* Gawd ... - Copied from /usr/src/linux-2.4.9-31/./include/asm-i386/posix_types.h -*/ -#undef VKI_FD_SET -#define VKI_FD_SET(fd,fdsetp) \ - __asm__ __volatile__("btsl %1,%0": \ - "=m" (*(vki_fd_set *) (fdsetp)):"r" ((int) (fd))) - -#undef VKI_FD_CLR -#define VKI_FD_CLR(fd,fdsetp) \ - __asm__ __volatile__("btrl %1,%0": \ - "=m" (*(vki_fd_set *) (fdsetp)):"r" ((int) (fd))) - -#undef VKI_FD_ISSET -#define VKI_FD_ISSET(fd,fdsetp) (__extension__ ({ \ - unsigned char __result; \ - __asm__ __volatile__("btl %1,%2 ; setb %0" \ - :"=q" (__result) :"r" ((int) (fd)), \ - "m" (*(vki_fd_set *) (fdsetp))); \ - __result; })) - -#undef VKI_FD_ZERO -#define VKI_FD_ZERO(fdsetp) \ -do { \ - int __d0, __d1; \ - __asm__ __volatile__("cld ; rep ; stosl" \ - :"=m" (*(vki_fd_set *) (fdsetp)), \ - "=&c" (__d0), "=&D" (__d1) \ - :"a" (0), "1" (VKI_FDSET_LONGS), \ - "2" ((vki_fd_set *) (fdsetp)) : "memory"); \ -} while (0) - - - -/* -./include/asm-i386/posix_types.h:typedef long __kernel_suseconds_t; -./include/linux/types.h:typedef __kernel_suseconds_t suseconds_t; - -./include/asm-i386/posix_types.h:typedef long __kernel_time_t; -./include/linux/types.h:typedef __kernel_time_t time_t; -*/ - -struct vki_timeval { - /* time_t */ long tv_sec; /* seconds */ - /* suseconds_t */ long tv_usec; /* microseconds */ -}; - - - -/* For fcntl on fds .. - from ./include/asm-i386/fcntl.h */ -#define VKI_F_GETFL 3 /* get file->f_flags */ -#define VKI_F_SETFL 4 /* set file->f_flags */ - -#define VKI_O_NONBLOCK 04000 - -/* For nanosleep ... - from ./include/linux/time.h */ -struct vki_timespec { - /* time_t */ long tv_sec; /* seconds */ - long tv_nsec; /* nanoseconds */ -}; - - -/* STAT stuff - from /usr/src/linux-2.4.9-31/include/asm-i386/stat.h */ -struct vki_stat { - unsigned short st_dev; - unsigned short __pad1; - unsigned long st_ino; - unsigned short st_mode; - unsigned short st_nlink; - unsigned short st_uid; - unsigned short st_gid; - unsigned short st_rdev; - unsigned short __pad2; - unsigned long st_size; - unsigned long st_blksize; - unsigned long st_blocks; - unsigned long st_atime; - unsigned long __unused1; - unsigned long st_mtime; - unsigned long __unused2; - unsigned long st_ctime; - unsigned long __unused3; - unsigned long __unused4; - unsigned long __unused5; -}; - - -/* To do with the ELF frame constructed by the kernel on a process' - stack just before it transfers control to the program's interpreter - (to use the ELF parlance). - Constants from /usr/src/linux-2.4.9-31/include/linux/elf.h - Logic from /usr/src/linux-2.4.9-31/fs/binfmt_elf.c - and its counterpart in the 2.2.14 kernel sources - in Red Hat 6.2. */ -#define VKI_AT_CLKTCK 17 /* frequency at which times() increments */ -#define VKI_AT_HWCAP 16 /* arch dependent hints at CPU capabilities */ -#define VKI_AT_BASE 7 /* base address of interpreter */ -#define VKI_AT_PAGESZ 6 /* system page size */ -#define VKI_AT_PHNUM 5 /* number of program headers */ -#define VKI_AT_PHENT 4 /* size of program header entry */ -#define VKI_AT_PHDR 3 /* program headers for program */ -#define VKI_AT_USER_AUX_SEGMENT 23 /* tell glibc what address segment - 0x3B points to. (Needed for - Red Hat Limbo, 7.3.92) */ - -/* Including leads to loads of hassle because then we - need sometimes (RedHat 7.3) and that is a - kernel-only header which deliberately #errors on gcc-3.1. Mucho - hassle considering that we only want to know sizeof(struct module). - Hence ... - - #include - #include - #include - - int main ( void ) - { - printf ("sizeof(struct module) = %d\n", sizeof(struct module) ); - return 0; - } -*/ - -#define VKI_SIZEOF_STRUCT_MODULE 96 - -#endif /* ndef __VG_KERNELIFACE_H */ - -/*--------------------------------------------------------------------*/ -/*--- end vg_kerneliface.h ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_libpthread.c b/coregrind/vg_libpthread.c deleted file mode 100644 index 60c4dc95ff..0000000000 --- a/coregrind/vg_libpthread.c +++ /dev/null @@ -1,2850 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- A replacement for the standard libpthread.so. ---*/ -/*--- vg_libpthread.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -/* ALL THIS CODE RUNS ON THE SIMULATED CPU. - - This is a replacement for the standard libpthread.so. It is loaded - as part of the client's image (if required) and directs pthread - calls through to Valgrind's request mechanism. - - A couple of caveats. - - 1. Since it's a binary-compatible replacement for an existing library, - we must take care to used exactly the same data layouts, etc, as - the standard pthread.so does. - - 2. Since this runs as part of the client, there are no specific - restrictions on what headers etc we can include, so long as - this libpthread.so does not end up having dependencies on .so's - which the real one doesn't. - - Later ... it appears we cannot call file-related stuff in libc here, - perhaps fair enough. Be careful what you call from here. Even exit() - doesn't work (gives infinite recursion and then stack overflow); hence - myexit(). Also fprintf doesn't seem safe. -*/ - -#include "valgrind.h" /* For the request-passing mechanism */ -#include "vg_include.h" /* For the VG_USERREQ__* constants */ - -#define __USE_UNIX98 -#include -#include -#undef __USE_UNIX98 - -#include -#include -#ifdef GLIBC_2_1 -#include -#endif - -#include - - -/* --------------------------------------------------------------------- - Forwardses. - ------------------------------------------------------------------ */ - -static void wait_for_fd_to_be_readable_or_erring ( int fd ); - -static -int my_do_syscall2 ( int syscallno, - int arg1, int arg2 ); - - -/* --------------------------------------------------------------------- - Helpers. We have to be pretty self-sufficient. - ------------------------------------------------------------------ */ - -/* Number of times any given error message is printed. */ -#define N_MOANS 3 - -/* Extract from Valgrind the value of VG_(clo_trace_pthread_level). - Returns 0 (none) if not running on Valgrind. */ -static -int get_pt_trace_level ( void ) -{ - int res; - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__GET_PTHREAD_TRACE_LEVEL, - 0, 0, 0, 0); - return res; -} - - -static -void my_exit ( int arg ) -{ - int __res; - __asm__ volatile ("movl %%ecx, %%ebx ; int $0x80" - : "=a" (__res) - : "0" (__NR_exit), - "c" (arg) ); - /* We don't bother to mention the fact that this asm trashes %ebx, - since it won't return. If you ever do let it return ... fix - this! */ -} - - -/* We need this guy -- it's in valgrind.so. */ -extern void VG_(startup) ( void ); - - -/* Just start up Valgrind if it's not already going. VG_(startup)() - detects and ignores second and subsequent calls. */ -static __inline__ -void ensure_valgrind ( char* caller ) -{ - VG_(startup)(); -} - -/* While we're at it ... hook our own startup function into this - game. */ -__asm__ ( - ".section .init\n" - "\tcall vgPlain_startup" -); - - -static -__attribute__((noreturn)) -void barf ( char* str ) -{ - char buf[100]; - buf[0] = 0; - strcat(buf, "\nvalgrind's libpthread.so: "); - strcat(buf, str); - strcat(buf, "\n\n"); - write(2, buf, strlen(buf)); - my_exit(1); - /* We have to persuade gcc into believing this doesn't return. */ - while (1) { }; -} - - -static void ignored ( char* msg ) -{ - if (get_pt_trace_level() >= 0) { - char* ig = "valgrind's libpthread.so: IGNORED call to: "; - write(2, ig, strlen(ig)); - write(2, msg, strlen(msg)); - ig = "\n"; - write(2, ig, strlen(ig)); - } -} - -static void kludged ( char* msg ) -{ - if (get_pt_trace_level() >= 0) { - char* ig = "valgrind's libpthread.so: KLUDGED call to: "; - write(2, ig, strlen(ig)); - write(2, msg, strlen(msg)); - ig = "\n"; - write(2, ig, strlen(ig)); - } -} - -static void not_inside ( char* msg ) -{ - VG_(startup)(); -} - -__attribute__((noreturn)) -void vgPlain_unimp ( char* what ) -{ - char* ig = "valgrind's libpthread.so: UNIMPLEMENTED FUNCTION: "; - write(2, ig, strlen(ig)); - write(2, what, strlen(what)); - ig = "\n"; - write(2, ig, strlen(ig)); - barf("Please report this bug to me at: jseward@acm.org"); -} - - -static -void my_assert_fail ( Char* expr, Char* file, Int line, Char* fn ) -{ - static Bool entered = False; - if (entered) - my_exit(2); - entered = True; - fprintf(stderr, "\n%s: %s:%d (%s): Assertion `%s' failed.\n", - "valgrind", file, line, fn, expr ); - fprintf(stderr, "Please report this bug to me at: %s\n\n", - VG_EMAIL_ADDR); - my_exit(1); -} - -#define MY__STRING(__str) #__str - -#define my_assert(expr) \ - ((void) ((expr) ? 0 : \ - (my_assert_fail (MY__STRING(expr), \ - __FILE__, __LINE__, \ - __PRETTY_FUNCTION__), 0))) - - -/* --------------------------------------------------------------------- - Pass pthread_ calls to Valgrind's request mechanism. - ------------------------------------------------------------------ */ - -#include -#include /* gettimeofday */ - - -/* --------------------------------------------------- - Ummm .. - ------------------------------------------------ */ - -static -void pthread_error ( const char* msg ) -{ - int res; - VALGRIND_MAGIC_SEQUENCE(res, 0, - VG_USERREQ__PTHREAD_ERROR, - msg, 0, 0, 0); -} - - -/* --------------------------------------------------- - THREAD ATTRIBUTES - ------------------------------------------------ */ - -int pthread_attr_init(pthread_attr_t *attr) -{ - /* Just initialise the fields which we might look at. */ - attr->__detachstate = PTHREAD_CREATE_JOINABLE; - return 0; -} - -int pthread_attr_setdetachstate(pthread_attr_t *attr, int detachstate) -{ - if (detachstate != PTHREAD_CREATE_JOINABLE - && detachstate != PTHREAD_CREATE_DETACHED) { - pthread_error("pthread_attr_setdetachstate: " - "detachstate is invalid"); - return EINVAL; - } - attr->__detachstate = detachstate; - return 0; -} - -int pthread_attr_setinheritsched(pthread_attr_t *attr, int inherit) -{ - static int moans = N_MOANS; - if (moans-- > 0) - ignored("pthread_attr_setinheritsched"); - return 0; -} - -__attribute__((weak)) -int pthread_attr_setstacksize (pthread_attr_t *__attr, - size_t __stacksize) -{ - size_t limit; - char buf[1024]; - ensure_valgrind("pthread_attr_setstacksize"); - limit = VG_PTHREAD_STACK_SIZE - VG_AR_CLIENT_STACKBASE_REDZONE_SZB - - 1000; /* paranoia */ - if (__stacksize < limit) - return 0; - snprintf(buf, sizeof(buf), "pthread_attr_setstacksize: " - "requested size %d >= VG_PTHREAD_STACK_SIZE\n " - "edit vg_include.h and rebuild.", __stacksize); - buf[sizeof(buf)-1] = '\0'; /* Make sure it is zero terminated */ - barf(buf); -} - - -/* This is completely bogus. */ -int pthread_attr_getschedparam(const pthread_attr_t *attr, - struct sched_param *param) -{ - static int moans = N_MOANS; - if (moans-- > 0) - kludged("pthread_attr_getschedparam"); -# ifdef HAVE_SCHED_PRIORITY - if (param) param->sched_priority = 0; /* who knows */ -# else - if (param) param->__sched_priority = 0; /* who knows */ -# endif - return 0; -} - -int pthread_attr_setschedparam(pthread_attr_t *attr, - const struct sched_param *param) -{ - static int moans = N_MOANS; - if (moans-- > 0) - ignored("pthread_attr_setschedparam"); - return 0; -} - -int pthread_attr_destroy(pthread_attr_t *attr) -{ - static int moans = N_MOANS; - if (moans-- > 0) - ignored("pthread_attr_destroy"); - return 0; -} - -/* These are no-ops, as with LinuxThreads. */ -int pthread_attr_setscope ( pthread_attr_t *attr, int scope ) -{ - ensure_valgrind("pthread_attr_setscope"); - if (scope == PTHREAD_SCOPE_SYSTEM) - return 0; - pthread_error("pthread_attr_setscope: " - "invalid or unsupported scope"); - if (scope == PTHREAD_SCOPE_PROCESS) - return ENOTSUP; - return EINVAL; -} - -int pthread_attr_getscope ( const pthread_attr_t *attr, int *scope ) -{ - ensure_valgrind("pthread_attr_setscope"); - if (scope) - *scope = PTHREAD_SCOPE_SYSTEM; - return 0; -} - - -/* Pretty bogus. Avoid if possible. */ -int pthread_getattr_np (pthread_t thread, pthread_attr_t *attr) -{ - int detached; - size_t limit; - ensure_valgrind("pthread_getattr_np"); - kludged("pthread_getattr_np"); - limit = VG_PTHREAD_STACK_SIZE - VG_AR_CLIENT_STACKBASE_REDZONE_SZB - - 1000; /* paranoia */ - attr->__detachstate = PTHREAD_CREATE_JOINABLE; - attr->__schedpolicy = SCHED_OTHER; - attr->__schedparam.sched_priority = 0; - attr->__inheritsched = PTHREAD_EXPLICIT_SCHED; - attr->__scope = PTHREAD_SCOPE_SYSTEM; - attr->__guardsize = VKI_BYTES_PER_PAGE; - attr->__stackaddr = NULL; - attr->__stackaddr_set = 0; - attr->__stacksize = limit; - VALGRIND_MAGIC_SEQUENCE(detached, (-1) /* default */, - VG_USERREQ__SET_OR_GET_DETACH, - 2 /* get */, thread, 0, 0); - my_assert(detached == 0 || detached == 1); - if (detached) - attr->__detachstate = PTHREAD_CREATE_DETACHED; - return 0; -} - - -/* Bogus ... */ -int pthread_attr_getstackaddr ( const pthread_attr_t * attr, - void ** stackaddr ) -{ - ensure_valgrind("pthread_attr_getstackaddr"); - kludged("pthread_attr_getstackaddr"); - if (stackaddr) - *stackaddr = NULL; - return 0; -} - -/* Not bogus (!) */ -int pthread_attr_getstacksize ( const pthread_attr_t * _attr, - size_t * __stacksize ) -{ - size_t limit; - ensure_valgrind("pthread_attr_getstacksize"); - limit = VG_PTHREAD_STACK_SIZE - VG_AR_CLIENT_STACKBASE_REDZONE_SZB - - 1000; /* paranoia */ - if (__stacksize) - *__stacksize = limit; - return 0; -} - -int pthread_attr_setschedpolicy(pthread_attr_t *attr, int policy) -{ - if (policy != SCHED_OTHER && policy != SCHED_FIFO && policy != SCHED_RR) - return EINVAL; - attr->__schedpolicy = policy; - return 0; -} - -int pthread_attr_getschedpolicy(const pthread_attr_t *attr, int *policy) -{ - *policy = attr->__schedpolicy; - return 0; -} - - -/* --------------------------------------------------- - Helper functions for running a thread - and for clearing up afterwards. - ------------------------------------------------ */ - -/* All exiting threads eventually pass through here, bearing the - return value, or PTHREAD_CANCELED, in ret_val. */ -static -__attribute__((noreturn)) -void thread_exit_wrapper ( void* ret_val ) -{ - int detached, res; - CleanupEntry cu; - pthread_key_t key; - - /* Run this thread's cleanup handlers. */ - while (1) { - VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */, - VG_USERREQ__CLEANUP_POP, - &cu, 0, 0, 0); - if (res == -1) break; /* stack empty */ - my_assert(res == 0); - if (0) printf("running exit cleanup handler"); - cu.fn ( cu.arg ); - } - - /* Run this thread's key finalizers. Really this should be run - PTHREAD_DESTRUCTOR_ITERATIONS times. */ - for (key = 0; key < VG_N_THREAD_KEYS; key++) { - VALGRIND_MAGIC_SEQUENCE(res, (-2) /* default */, - VG_USERREQ__GET_KEY_D_AND_S, - key, &cu, 0, 0 ); - if (res == 0) { - /* valid key */ - if (cu.fn && cu.arg) - cu.fn /* destructor for key */ - ( cu.arg /* specific for key for this thread */ ); - continue; - } - my_assert(res == -1); - } - - /* Decide on my final disposition. */ - VALGRIND_MAGIC_SEQUENCE(detached, (-1) /* default */, - VG_USERREQ__SET_OR_GET_DETACH, - 2 /* get */, pthread_self(), 0, 0); - my_assert(detached == 0 || detached == 1); - - if (detached) { - /* Detached; I just quit right now. */ - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__QUIT, 0, 0, 0, 0); - } else { - /* Not detached; so I wait for a joiner. */ - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__WAIT_JOINER, ret_val, 0, 0, 0); - } - /* NOTREACHED */ - barf("thread_exit_wrapper: still alive?!"); -} - - -/* This function is a wrapper function for running a thread. It runs - the root function specified in pthread_create, and then, should the - root function return a value, it arranges to run the thread's - cleanup handlers and exit correctly. */ - -/* Struct used to convey info from pthread_create to thread_wrapper. - Must be careful not to pass to the child thread any pointers to - objects which might be on the parent's stack. */ -typedef - struct { - int attr__detachstate; - void* (*root_fn) ( void* ); - void* arg; - } - NewThreadInfo; - - -/* This is passed to the VG_USERREQ__APPLY_IN_NEW_THREAD and so must - not return. Note that this runs in the new thread, not the - parent. */ -static -__attribute__((noreturn)) -void thread_wrapper ( NewThreadInfo* info ) -{ - int res; - int attr__detachstate; - void* (*root_fn) ( void* ); - void* arg; - void* ret_val; - - attr__detachstate = info->attr__detachstate; - root_fn = info->root_fn; - arg = info->arg; - - /* Free up the arg block that pthread_create malloced. */ - VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */, - VG_USERREQ__FREE, info, 0, 0, 0); - my_assert(res == 0); - - /* Minimally observe the attributes supplied. */ - if (attr__detachstate != PTHREAD_CREATE_DETACHED - && attr__detachstate != PTHREAD_CREATE_JOINABLE) - pthread_error("thread_wrapper: invalid attr->__detachstate"); - if (attr__detachstate == PTHREAD_CREATE_DETACHED) - pthread_detach(pthread_self()); - - /* The root function might not return. But if it does we simply - move along to thread_exit_wrapper. All other ways out for the - thread (cancellation, or calling pthread_exit) lead there - too. */ - ret_val = root_fn(arg); - thread_exit_wrapper(ret_val); - /* NOTREACHED */ -} - - -/* --------------------------------------------------- - THREADs - ------------------------------------------------ */ - -__attribute__((weak)) -int pthread_yield ( void ) -{ - int res; - ensure_valgrind("pthread_yield"); - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_YIELD, 0, 0, 0, 0); - return 0; -} - - -int pthread_equal(pthread_t thread1, pthread_t thread2) -{ - return thread1 == thread2 ? 1 : 0; -} - - -/* Bundle up the args into a malloc'd block and create a new thread - consisting of thread_wrapper() applied to said malloc'd block. */ -int -pthread_create (pthread_t *__restrict __thredd, - __const pthread_attr_t *__restrict __attr, - void *(*__start_routine) (void *), - void *__restrict __arg) -{ - int tid_child; - NewThreadInfo* info; - - ensure_valgrind("pthread_create"); - - /* Allocate space for the arg block. thread_wrapper will free - it. */ - VALGRIND_MAGIC_SEQUENCE(info, NULL /* default */, - VG_USERREQ__MALLOC, - sizeof(NewThreadInfo), 0, 0, 0); - my_assert(info != NULL); - - if (__attr) - info->attr__detachstate = __attr->__detachstate; - else - info->attr__detachstate = PTHREAD_CREATE_JOINABLE; - - info->root_fn = __start_routine; - info->arg = __arg; - VALGRIND_MAGIC_SEQUENCE(tid_child, VG_INVALID_THREADID /* default */, - VG_USERREQ__APPLY_IN_NEW_THREAD, - &thread_wrapper, info, 0, 0); - my_assert(tid_child != VG_INVALID_THREADID); - - if (__thredd) - *__thredd = tid_child; - return 0; /* success */ -} - - -int -pthread_join (pthread_t __th, void **__thread_return) -{ - int res; - ensure_valgrind("pthread_join"); - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_JOIN, - __th, __thread_return, 0, 0); - return res; -} - - -void pthread_exit(void *retval) -{ - ensure_valgrind("pthread_exit"); - /* Simple! */ - thread_exit_wrapper(retval); -} - - -pthread_t pthread_self(void) -{ - int tid; - ensure_valgrind("pthread_self"); - VALGRIND_MAGIC_SEQUENCE(tid, 1 /* default */, - VG_USERREQ__PTHREAD_GET_THREADID, - 0, 0, 0, 0); - if (tid < 1 || tid >= VG_N_THREADS) - barf("pthread_self: invalid ThreadId"); - return tid; -} - - -int pthread_detach(pthread_t th) -{ - int res; - ensure_valgrind("pthread_detach"); - /* First we enquire as to the current detach state. */ - VALGRIND_MAGIC_SEQUENCE(res, (-2) /* default */, - VG_USERREQ__SET_OR_GET_DETACH, - 2 /* get */, th, 0, 0); - if (res == -1) { - /* not found */ - pthread_error("pthread_detach: " - "invalid target thread"); - return ESRCH; - } - if (res == 1) { - /* already detached */ - pthread_error("pthread_detach: " - "target thread is already detached"); - return EINVAL; - } - if (res == 0) { - VALGRIND_MAGIC_SEQUENCE(res, (-2) /* default */, - VG_USERREQ__SET_OR_GET_DETACH, - 1 /* set */, th, 0, 0); - my_assert(res == 0); - return 0; - } - barf("pthread_detach"); -} - - -/* --------------------------------------------------- - CLEANUP STACKS - ------------------------------------------------ */ - -void _pthread_cleanup_push (struct _pthread_cleanup_buffer *__buffer, - void (*__routine) (void *), - void *__arg) -{ - int res; - CleanupEntry cu; - ensure_valgrind("_pthread_cleanup_push"); - cu.fn = __routine; - cu.arg = __arg; - VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */, - VG_USERREQ__CLEANUP_PUSH, - &cu, 0, 0, 0); - my_assert(res == 0); -} - - -void _pthread_cleanup_push_defer (struct _pthread_cleanup_buffer *__buffer, - void (*__routine) (void *), - void *__arg) -{ - /* As _pthread_cleanup_push, but first save the thread's original - cancellation type in __buffer and set it to Deferred. */ - int orig_ctype; - ensure_valgrind("_pthread_cleanup_push_defer"); - /* Set to Deferred, and put the old cancellation type in res. */ - my_assert(-1 != PTHREAD_CANCEL_DEFERRED); - my_assert(-1 != PTHREAD_CANCEL_ASYNCHRONOUS); - my_assert(sizeof(struct _pthread_cleanup_buffer) >= sizeof(int)); - VALGRIND_MAGIC_SEQUENCE(orig_ctype, (-1) /* default */, - VG_USERREQ__SET_CANCELTYPE, - PTHREAD_CANCEL_DEFERRED, 0, 0, 0); - my_assert(orig_ctype != -1); - *((int*)(__buffer)) = orig_ctype; - /* Now push the cleanup. */ - _pthread_cleanup_push(NULL, __routine, __arg); -} - - -void _pthread_cleanup_pop (struct _pthread_cleanup_buffer *__buffer, - int __execute) -{ - int res; - CleanupEntry cu; - ensure_valgrind("_pthread_cleanup_push"); - cu.fn = cu.arg = NULL; /* paranoia */ - VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */, - VG_USERREQ__CLEANUP_POP, - &cu, 0, 0, 0); - if (res == 0) { - /* pop succeeded */ - if (__execute) { - cu.fn ( cu.arg ); - } - return; - } - if (res == -1) { - /* stack underflow */ - return; - } - barf("_pthread_cleanup_pop"); -} - - -void _pthread_cleanup_pop_restore (struct _pthread_cleanup_buffer *__buffer, - int __execute) -{ - int orig_ctype, fake_ctype; - /* As _pthread_cleanup_pop, but after popping/running the handler, - restore the thread's original cancellation type from the first - word of __buffer. */ - _pthread_cleanup_pop(NULL, __execute); - orig_ctype = *((int*)(__buffer)); - my_assert(orig_ctype == PTHREAD_CANCEL_DEFERRED - || orig_ctype == PTHREAD_CANCEL_ASYNCHRONOUS); - my_assert(-1 != PTHREAD_CANCEL_DEFERRED); - my_assert(-1 != PTHREAD_CANCEL_ASYNCHRONOUS); - my_assert(sizeof(struct _pthread_cleanup_buffer) >= sizeof(int)); - VALGRIND_MAGIC_SEQUENCE(fake_ctype, (-1) /* default */, - VG_USERREQ__SET_CANCELTYPE, - orig_ctype, 0, 0, 0); - my_assert(fake_ctype == PTHREAD_CANCEL_DEFERRED); -} - - -/* --------------------------------------------------- - MUTEX ATTRIBUTES - ------------------------------------------------ */ - -int __pthread_mutexattr_init(pthread_mutexattr_t *attr) -{ - attr->__mutexkind = PTHREAD_MUTEX_ERRORCHECK_NP; - return 0; -} - -int __pthread_mutexattr_settype(pthread_mutexattr_t *attr, int type) -{ - switch (type) { -# ifndef GLIBC_2_1 - case PTHREAD_MUTEX_TIMED_NP: - case PTHREAD_MUTEX_ADAPTIVE_NP: -# endif -# ifdef GLIBC_2_1 - case PTHREAD_MUTEX_FAST_NP: -# endif - case PTHREAD_MUTEX_RECURSIVE_NP: - case PTHREAD_MUTEX_ERRORCHECK_NP: - attr->__mutexkind = type; - return 0; - default: - pthread_error("pthread_mutexattr_settype: " - "invalid type"); - return EINVAL; - } -} - -int __pthread_mutexattr_destroy(pthread_mutexattr_t *attr) -{ - return 0; -} - - -/* --------------------------------------------------- - MUTEXes - ------------------------------------------------ */ - -int __pthread_mutex_init(pthread_mutex_t *mutex, - const pthread_mutexattr_t *mutexattr) -{ - mutex->__m_count = 0; - mutex->__m_owner = (_pthread_descr)VG_INVALID_THREADID; - mutex->__m_kind = PTHREAD_MUTEX_ERRORCHECK_NP; - if (mutexattr) - mutex->__m_kind = mutexattr->__mutexkind; - return 0; -} - - -int __pthread_mutex_lock(pthread_mutex_t *mutex) -{ - int res; - static int moans = N_MOANS; - if (RUNNING_ON_VALGRIND) { - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_MUTEX_LOCK, - mutex, 0, 0, 0); - return res; - } else { - if (moans-- > 0) - not_inside("pthread_mutex_lock"); - return 0; /* success */ - } -} - - -int __pthread_mutex_trylock(pthread_mutex_t *mutex) -{ - int res; - static int moans = N_MOANS; - if (RUNNING_ON_VALGRIND) { - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_MUTEX_TRYLOCK, - mutex, 0, 0, 0); - return res; - } else { - if (moans-- > 0) - not_inside("pthread_mutex_trylock"); - return 0; - } -} - - -int __pthread_mutex_unlock(pthread_mutex_t *mutex) -{ - int res; - static int moans = N_MOANS; - if (RUNNING_ON_VALGRIND) { - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_MUTEX_UNLOCK, - mutex, 0, 0, 0); - return res; - } else { - if (moans-- > 0) - not_inside("pthread_mutex_unlock"); - return 0; - } -} - - -int __pthread_mutex_destroy(pthread_mutex_t *mutex) -{ - /* Valgrind doesn't hold any resources on behalf of the mutex, so no - need to involve it. */ - if (mutex->__m_count > 0) { - pthread_error("pthread_mutex_destroy: " - "mutex is still in use"); - return EBUSY; - } - mutex->__m_count = 0; - mutex->__m_owner = (_pthread_descr)VG_INVALID_THREADID; - mutex->__m_kind = PTHREAD_MUTEX_ERRORCHECK_NP; - return 0; -} - - -/* --------------------------------------------------- - CONDITION VARIABLES - ------------------------------------------------ */ - -/* LinuxThreads supports no attributes for conditions. Hence ... */ - -int pthread_condattr_init(pthread_condattr_t *attr) -{ - return 0; -} - -int pthread_condattr_destroy(pthread_condattr_t *attr) -{ - return 0; -} - -int pthread_cond_init( pthread_cond_t *cond, - const pthread_condattr_t *cond_attr) -{ - cond->__c_waiting = (_pthread_descr)VG_INVALID_THREADID; - return 0; -} - -int pthread_cond_destroy(pthread_cond_t *cond) -{ - /* should check that no threads are waiting on this CV */ - static int moans = N_MOANS; - if (moans-- > 0) - kludged("pthread_cond_destroy"); - return 0; -} - -/* --------------------------------------------------- - SCHEDULING - ------------------------------------------------ */ - -/* This is completely bogus. */ -int pthread_getschedparam(pthread_t target_thread, - int *policy, - struct sched_param *param) -{ - static int moans = N_MOANS; - if (moans-- > 0) - kludged("pthread_getschedparam"); - if (policy) *policy = SCHED_OTHER; -# ifdef HAVE_SCHED_PRIORITY - if (param) param->sched_priority = 0; /* who knows */ -# else - if (param) param->__sched_priority = 0; /* who knows */ -# endif - return 0; -} - -int pthread_setschedparam(pthread_t target_thread, - int policy, - const struct sched_param *param) -{ - static int moans = N_MOANS; - if (moans-- > 0) - ignored("pthread_setschedparam"); - return 0; -} - -int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex) -{ - int res; - ensure_valgrind("pthread_cond_wait"); - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_COND_WAIT, - cond, mutex, 0, 0); - return res; -} - -int pthread_cond_timedwait ( pthread_cond_t *cond, - pthread_mutex_t *mutex, - const struct timespec *abstime ) -{ - int res; - unsigned int ms_now, ms_end; - struct timeval timeval_now; - unsigned long long int ull_ms_now_after_1970; - unsigned long long int ull_ms_end_after_1970; - - ensure_valgrind("pthread_cond_timedwait"); - VALGRIND_MAGIC_SEQUENCE(ms_now, 0xFFFFFFFF /* default */, - VG_USERREQ__READ_MILLISECOND_TIMER, - 0, 0, 0, 0); - my_assert(ms_now != 0xFFFFFFFF); - res = gettimeofday(&timeval_now, NULL); - my_assert(res == 0); - - ull_ms_now_after_1970 - = 1000ULL * ((unsigned long long int)(timeval_now.tv_sec)) - + ((unsigned long long int)(timeval_now.tv_usec / 1000000)); - ull_ms_end_after_1970 - = 1000ULL * ((unsigned long long int)(abstime->tv_sec)) - + ((unsigned long long int)(abstime->tv_nsec / 1000000)); - if (ull_ms_end_after_1970 < ull_ms_now_after_1970) - ull_ms_end_after_1970 = ull_ms_now_after_1970; - ms_end - = ms_now + (unsigned int)(ull_ms_end_after_1970 - ull_ms_now_after_1970); - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_COND_TIMEDWAIT, - cond, mutex, ms_end, 0); - return res; -} - - -int pthread_cond_signal(pthread_cond_t *cond) -{ - int res; - ensure_valgrind("pthread_cond_signal"); - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_COND_SIGNAL, - cond, 0, 0, 0); - return res; -} - -int pthread_cond_broadcast(pthread_cond_t *cond) -{ - int res; - ensure_valgrind("pthread_cond_broadcast"); - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_COND_BROADCAST, - cond, 0, 0, 0); - return res; -} - - -/* --------------------------------------------------- - CANCELLATION - ------------------------------------------------ */ - -int pthread_setcancelstate(int state, int *oldstate) -{ - int res; - ensure_valgrind("pthread_setcancelstate"); - if (state != PTHREAD_CANCEL_ENABLE - && state != PTHREAD_CANCEL_DISABLE) { - pthread_error("pthread_setcancelstate: " - "invalid state"); - return EINVAL; - } - my_assert(-1 != PTHREAD_CANCEL_ENABLE); - my_assert(-1 != PTHREAD_CANCEL_DISABLE); - VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */, - VG_USERREQ__SET_CANCELSTATE, - state, 0, 0, 0); - my_assert(res != -1); - if (oldstate) - *oldstate = res; - return 0; -} - -int pthread_setcanceltype(int type, int *oldtype) -{ - int res; - ensure_valgrind("pthread_setcanceltype"); - if (type != PTHREAD_CANCEL_DEFERRED - && type != PTHREAD_CANCEL_ASYNCHRONOUS) { - pthread_error("pthread_setcanceltype: " - "invalid type"); - return EINVAL; - } - my_assert(-1 != PTHREAD_CANCEL_DEFERRED); - my_assert(-1 != PTHREAD_CANCEL_ASYNCHRONOUS); - VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */, - VG_USERREQ__SET_CANCELTYPE, - type, 0, 0, 0); - my_assert(res != -1); - if (oldtype) - *oldtype = res; - return 0; -} - -int pthread_cancel(pthread_t thread) -{ - int res; - ensure_valgrind("pthread_cancel"); - VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */, - VG_USERREQ__SET_CANCELPEND, - thread, &thread_exit_wrapper, 0, 0); - my_assert(res != -1); - return res; -} - -static __inline__ -void __my_pthread_testcancel(void) -{ - int res; - VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */, - VG_USERREQ__TESTCANCEL, - 0, 0, 0, 0); - my_assert(res == 0); -} - -void pthread_testcancel ( void ) -{ - __my_pthread_testcancel(); -} - - -/* Not really sure what this is for. I suspect for doing the POSIX - requirements for fork() and exec(). We do this internally anyway - whenever those syscalls are observed, so this could be superfluous, - but hey ... -*/ -void __pthread_kill_other_threads_np ( void ) -{ - int res; - ensure_valgrind("__pthread_kill_other_threads_np"); - VALGRIND_MAGIC_SEQUENCE(res, (-1) /* default */, - VG_USERREQ__NUKE_OTHER_THREADS, - 0, 0, 0, 0); - my_assert(res == 0); -} - - -/* --------------------------------------------------- - SIGNALS - ------------------------------------------------ */ - -#include - -int pthread_sigmask(int how, const sigset_t *newmask, - sigset_t *oldmask) -{ - int res; - - /* A bit subtle, because the scheduler expects newmask and oldmask - to be vki_sigset_t* rather than sigset_t*, and the two are - different. Fortunately the first 64 bits of a sigset_t are - exactly a vki_sigset_t, so we just pass the pointers through - unmodified. Haaaack! - - Also mash the how value so that the SIG_ constants from glibc - constants to VKI_ constants, so that the former do not have to - be included into vg_scheduler.c. */ - - ensure_valgrind("pthread_sigmask"); - - switch (how) { - case SIG_SETMASK: how = VKI_SIG_SETMASK; break; - case SIG_BLOCK: how = VKI_SIG_BLOCK; break; - case SIG_UNBLOCK: how = VKI_SIG_UNBLOCK; break; - default: pthread_error("pthread_sigmask: invalid how"); - return EINVAL; - } - - /* Crude check */ - if (newmask == NULL) - return EFAULT; - - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_SIGMASK, - how, newmask, oldmask, 0); - - /* The scheduler tells us of any memory violations. */ - return res == 0 ? 0 : EFAULT; -} - - -int sigwait ( const sigset_t* set, int* sig ) -{ - int res; - ensure_valgrind("sigwait"); - /* As with pthread_sigmask we deliberately confuse sigset_t with - vki_ksigset_t. */ - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__SIGWAIT, - set, sig, 0, 0); - return res; -} - - -int pthread_kill(pthread_t thread, int signo) -{ - int res; - ensure_valgrind("pthread_kill"); - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_KILL, - thread, signo, 0, 0); - return res; -} - - -/* Copied verbatim from Linuxthreads */ -/* Redefine raise() to send signal to calling thread only, - as per POSIX 1003.1c */ -int raise (int sig) -{ - int retcode = pthread_kill(pthread_self(), sig); - if (retcode == 0) { - return 0; - } else { - errno = retcode; - return -1; - } -} - - -int pause ( void ) -{ - unsigned int n_orig, n_now; - struct vki_timespec nanosleep_interval; - ensure_valgrind("pause"); - - /* This is surely a cancellation point. */ - __my_pthread_testcancel(); - - VALGRIND_MAGIC_SEQUENCE(n_orig, 0xFFFFFFFF /* default */, - VG_USERREQ__GET_N_SIGS_RETURNED, - 0, 0, 0, 0); - my_assert(n_orig != 0xFFFFFFFF); - - while (1) { - VALGRIND_MAGIC_SEQUENCE(n_now, 0xFFFFFFFF /* default */, - VG_USERREQ__GET_N_SIGS_RETURNED, - 0, 0, 0, 0); - my_assert(n_now != 0xFFFFFFFF); - my_assert(n_now >= n_orig); - if (n_now != n_orig) break; - - nanosleep_interval.tv_sec = 0; - nanosleep_interval.tv_nsec = 52 * 1000 * 1000; /* 52 milliseconds */ - /* It's critical here that valgrind's nanosleep implementation - is nonblocking. */ - (void)my_do_syscall2(__NR_nanosleep, - (int)(&nanosleep_interval), (int)NULL); - } - - * (__errno_location()) = EINTR; - return -1; -} - - -/* --------------------------------------------------- - THREAD-SPECIFICs - ------------------------------------------------ */ - -int __pthread_key_create(pthread_key_t *key, - void (*destr_function) (void *)) -{ - int res; - ensure_valgrind("pthread_key_create"); - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_KEY_CREATE, - key, destr_function, 0, 0); - return res; -} - -int pthread_key_delete(pthread_key_t key) -{ - static int moans = N_MOANS; - if (moans-- > 0) - ignored("pthread_key_delete"); - return 0; -} - -int __pthread_setspecific(pthread_key_t key, const void *pointer) -{ - int res; - ensure_valgrind("pthread_setspecific"); - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_SETSPECIFIC, - key, pointer, 0, 0); - return res; -} - -void * __pthread_getspecific(pthread_key_t key) -{ - int res; - ensure_valgrind("pthread_getspecific"); - VALGRIND_MAGIC_SEQUENCE(res, 0 /* default */, - VG_USERREQ__PTHREAD_GETSPECIFIC, - key, 0 , 0, 0); - return (void*)res; -} - - -/* --------------------------------------------------- - ONCEry - ------------------------------------------------ */ - -static pthread_mutex_t once_masterlock = PTHREAD_MUTEX_INITIALIZER; - - -int __pthread_once ( pthread_once_t *once_control, - void (*init_routine) (void) ) -{ - int res; - ensure_valgrind("pthread_once"); - - res = __pthread_mutex_lock(&once_masterlock); - - if (res != 0) { - barf("pthread_once: Looks like your program's " - "init routine calls back to pthread_once() ?!"); - } - - if (*once_control == 0) { - *once_control = 1; - init_routine(); - } - - __pthread_mutex_unlock(&once_masterlock); - - return 0; -} - - -/* --------------------------------------------------- - MISC - ------------------------------------------------ */ - -static pthread_mutex_t pthread_atfork_lock - = PTHREAD_MUTEX_INITIALIZER; - -int __pthread_atfork ( void (*prepare)(void), - void (*parent)(void), - void (*child)(void) ) -{ - int n, res; - ForkHandlerEntry entry; - - ensure_valgrind("pthread_atfork"); - __pthread_mutex_lock(&pthread_atfork_lock); - - /* Fetch old counter */ - VALGRIND_MAGIC_SEQUENCE(n, -2 /* default */, - VG_USERREQ__GET_FHSTACK_USED, - 0, 0, 0, 0); - my_assert(n >= 0 && n < VG_N_FORKHANDLERSTACK); - if (n == VG_N_FORKHANDLERSTACK-1) - barf("pthread_atfork: VG_N_FORKHANDLERSTACK is too low; " - "increase and recompile"); - - /* Add entry */ - entry.prepare = *prepare; - entry.parent = *parent; - entry.child = *child; - VALGRIND_MAGIC_SEQUENCE(res, -2 /* default */, - VG_USERREQ__SET_FHSTACK_ENTRY, - n, &entry, 0, 0); - my_assert(res == 0); - - /* Bump counter */ - VALGRIND_MAGIC_SEQUENCE(res, -2 /* default */, - VG_USERREQ__SET_FHSTACK_USED, - n+1, 0, 0, 0); - my_assert(res == 0); - - __pthread_mutex_unlock(&pthread_atfork_lock); - return 0; -} - - -__attribute__((weak)) -void __pthread_initialize ( void ) -{ - ensure_valgrind("__pthread_initialize"); -} - - -/* --------------------------------------------------- - LIBRARY-PRIVATE THREAD SPECIFIC STATE - ------------------------------------------------ */ - -#include -static int thread_specific_errno[VG_N_THREADS]; -static int thread_specific_h_errno[VG_N_THREADS]; -static struct __res_state - thread_specific_res_state[VG_N_THREADS]; - -int* __errno_location ( void ) -{ - int tid; - /* ensure_valgrind("__errno_location"); */ - VALGRIND_MAGIC_SEQUENCE(tid, 1 /* default */, - VG_USERREQ__PTHREAD_GET_THREADID, - 0, 0, 0, 0); - /* 'cos I'm paranoid ... */ - if (tid < 1 || tid >= VG_N_THREADS) - barf("__errno_location: invalid ThreadId"); - return & thread_specific_errno[tid]; -} - -int* __h_errno_location ( void ) -{ - int tid; - /* ensure_valgrind("__h_errno_location"); */ - VALGRIND_MAGIC_SEQUENCE(tid, 1 /* default */, - VG_USERREQ__PTHREAD_GET_THREADID, - 0, 0, 0, 0); - /* 'cos I'm paranoid ... */ - if (tid < 1 || tid >= VG_N_THREADS) - barf("__h_errno_location: invalid ThreadId"); - return & thread_specific_h_errno[tid]; -} - -struct __res_state* __res_state ( void ) -{ - int tid; - /* ensure_valgrind("__res_state"); */ - VALGRIND_MAGIC_SEQUENCE(tid, 1 /* default */, - VG_USERREQ__PTHREAD_GET_THREADID, - 0, 0, 0, 0); - /* 'cos I'm paranoid ... */ - if (tid < 1 || tid >= VG_N_THREADS) - barf("__res_state: invalid ThreadId"); - return & thread_specific_res_state[tid]; -} - - -/* --------------------------------------------------- - LIBC-PRIVATE SPECIFIC DATA - ------------------------------------------------ */ - -/* Relies on assumption that initial private data is NULL. This - should be fixed somehow. */ - -/* The allowable keys (indices) (all 2 of them). - From sysdeps/pthread/bits/libc-tsd.h -*/ -#define N_LIBC_TSD_EXTRA_KEYS 1 - -enum __libc_tsd_key_t { _LIBC_TSD_KEY_MALLOC = 0, - _LIBC_TSD_KEY_DL_ERROR, - _LIBC_TSD_KEY_N }; - -/* Auto-initialising subsystem. libc_specifics_inited is set - after initialisation. libc_specifics_inited_mx guards it. */ -static int libc_specifics_inited = 0; -static pthread_mutex_t libc_specifics_inited_mx = PTHREAD_MUTEX_INITIALIZER; - -/* These are the keys we must initialise the first time. */ -static pthread_key_t libc_specifics_keys[_LIBC_TSD_KEY_N - + N_LIBC_TSD_EXTRA_KEYS]; - -/* Initialise the keys, if they are not already initialise. */ -static -void init_libc_tsd_keys ( void ) -{ - int res, i; - pthread_key_t k; - - res = pthread_mutex_lock(&libc_specifics_inited_mx); - if (res != 0) barf("init_libc_tsd_keys: lock"); - - if (libc_specifics_inited == 0) { - /* printf("INIT libc specifics\n"); */ - libc_specifics_inited = 1; - for (i = 0; i < _LIBC_TSD_KEY_N + N_LIBC_TSD_EXTRA_KEYS; i++) { - res = pthread_key_create(&k, NULL); - if (res != 0) barf("init_libc_tsd_keys: create"); - libc_specifics_keys[i] = k; - } - } - - res = pthread_mutex_unlock(&libc_specifics_inited_mx); - if (res != 0) barf("init_libc_tsd_keys: unlock"); -} - - -static int -libc_internal_tsd_set ( enum __libc_tsd_key_t key, - const void * pointer ) -{ - int res; - static int moans = N_MOANS; - /* printf("SET SET SET key %d ptr %p\n", key, pointer); */ - if (key < _LIBC_TSD_KEY_MALLOC - || key >= _LIBC_TSD_KEY_N + N_LIBC_TSD_EXTRA_KEYS) - barf("libc_internal_tsd_set: invalid key"); - if (key >= _LIBC_TSD_KEY_N && moans-- > 0) - fprintf(stderr, - "valgrind's libpthread.so: libc_internal_tsd_set: " - "dubious key %d\n", key); - init_libc_tsd_keys(); - res = pthread_setspecific(libc_specifics_keys[key], pointer); - if (res != 0) barf("libc_internal_tsd_set: setspecific failed"); - return 0; -} - -static void * -libc_internal_tsd_get ( enum __libc_tsd_key_t key ) -{ - void* v; - static int moans = N_MOANS; - /* printf("GET GET GET key %d\n", key); */ - if (key < _LIBC_TSD_KEY_MALLOC - || key >= _LIBC_TSD_KEY_N + N_LIBC_TSD_EXTRA_KEYS) - barf("libc_internal_tsd_get: invalid key"); - if (key >= _LIBC_TSD_KEY_N && moans-- > 0) - fprintf(stderr, - "valgrind's libpthread.so: libc_internal_tsd_get: " - "dubious key %d\n", key); - init_libc_tsd_keys(); - v = pthread_getspecific(libc_specifics_keys[key]); - /* if (v == NULL) barf("libc_internal_tsd_set: getspecific failed"); */ - return v; -} - - - - -int (*__libc_internal_tsd_set) - (enum __libc_tsd_key_t key, const void * pointer) - = libc_internal_tsd_set; - -void* (*__libc_internal_tsd_get) - (enum __libc_tsd_key_t key) - = libc_internal_tsd_get; - - -/* --------------------------------------------------------------------- - These are here (I think) because they are deemed cancellation - points by POSIX. For the moment we'll simply pass the call along - to the corresponding thread-unaware (?) libc routine. - ------------------------------------------------------------------ */ - -#include -#include -#include - -#ifdef GLIBC_2_1 -extern -int __sigaction - (int signum, - const struct sigaction *act, - struct sigaction *oldact); -#else -extern -int __libc_sigaction - (int signum, - const struct sigaction *act, - struct sigaction *oldact); -#endif -int sigaction(int signum, - const struct sigaction *act, - struct sigaction *oldact) -{ - __my_pthread_testcancel(); -# ifdef GLIBC_2_1 - return __sigaction(signum, act, oldact); -# else - return __libc_sigaction(signum, act, oldact); -# endif -} - - -extern -int __libc_connect(int sockfd, - const struct sockaddr *serv_addr, - socklen_t addrlen); -__attribute__((weak)) -int connect(int sockfd, - const struct sockaddr *serv_addr, - socklen_t addrlen) -{ - __my_pthread_testcancel(); - return __libc_connect(sockfd, serv_addr, addrlen); -} - - -extern -int __libc_fcntl(int fd, int cmd, long arg); -__attribute__((weak)) -int fcntl(int fd, int cmd, long arg) -{ - __my_pthread_testcancel(); - return __libc_fcntl(fd, cmd, arg); -} - - -extern -ssize_t __libc_write(int fd, const void *buf, size_t count); -__attribute__((weak)) -ssize_t write(int fd, const void *buf, size_t count) -{ - __my_pthread_testcancel(); - return __libc_write(fd, buf, count); -} - - -extern -ssize_t __libc_read(int fd, void *buf, size_t count); -__attribute__((weak)) -ssize_t read(int fd, void *buf, size_t count) -{ - __my_pthread_testcancel(); - return __libc_read(fd, buf, count); -} - - -extern -int __libc_open64(const char *pathname, int flags, mode_t mode); -__attribute__((weak)) -int open64(const char *pathname, int flags, mode_t mode) -{ - __my_pthread_testcancel(); - return __libc_open64(pathname, flags, mode); -} - - -extern -int __libc_open(const char *pathname, int flags, mode_t mode); -__attribute__((weak)) -int open(const char *pathname, int flags, mode_t mode) -{ - __my_pthread_testcancel(); - return __libc_open(pathname, flags, mode); -} - - -extern -int __libc_close(int fd); -__attribute__((weak)) -int close(int fd) -{ - __my_pthread_testcancel(); - return __libc_close(fd); -} - - -extern -int __libc_accept(int s, struct sockaddr *addr, socklen_t *addrlen); -__attribute__((weak)) -int accept(int s, struct sockaddr *addr, socklen_t *addrlen) -{ - __my_pthread_testcancel(); - wait_for_fd_to_be_readable_or_erring(s); - __my_pthread_testcancel(); - return __libc_accept(s, addr, addrlen); -} - - -extern -pid_t __libc_waitpid(pid_t pid, int *status, int options); -__attribute__((weak)) -pid_t waitpid(pid_t pid, int *status, int options) -{ - __my_pthread_testcancel(); - return __libc_waitpid(pid, status, options); -} - - -extern -int __libc_nanosleep(const struct timespec *req, struct timespec *rem); -__attribute__((weak)) -int nanosleep(const struct timespec *req, struct timespec *rem) -{ - __my_pthread_testcancel(); - return __libc_nanosleep(req, rem); -} - - -extern -int __libc_fsync(int fd); -__attribute__((weak)) -int fsync(int fd) -{ - __my_pthread_testcancel(); - return __libc_fsync(fd); -} - - -extern -off_t __libc_lseek(int fildes, off_t offset, int whence); -__attribute__((weak)) -off_t lseek(int fildes, off_t offset, int whence) -{ - __my_pthread_testcancel(); - return __libc_lseek(fildes, offset, whence); -} - - -extern -__off64_t __libc_lseek64(int fildes, __off64_t offset, int whence); -__attribute__((weak)) -__off64_t lseek64(int fildes, __off64_t offset, int whence) -{ - __my_pthread_testcancel(); - return __libc_lseek64(fildes, offset, whence); -} - - -extern -ssize_t __libc_pread64 (int __fd, void *__buf, size_t __nbytes, - __off64_t __offset); -ssize_t __pread64 (int __fd, void *__buf, size_t __nbytes, - __off64_t __offset) -{ - __my_pthread_testcancel(); - return __libc_pread64(__fd, __buf, __nbytes, __offset); -} - - -extern -ssize_t __libc_pwrite64 (int __fd, const void *__buf, size_t __nbytes, - __off64_t __offset); -ssize_t __pwrite64 (int __fd, const void *__buf, size_t __nbytes, - __off64_t __offset) -{ - __my_pthread_testcancel(); - return __libc_pwrite64(__fd, __buf, __nbytes, __offset); -} - - -extern -ssize_t __libc_pwrite(int fd, const void *buf, size_t count, off_t offset); -__attribute__((weak)) -ssize_t pwrite(int fd, const void *buf, size_t count, off_t offset) -{ - __my_pthread_testcancel(); - return __libc_pwrite(fd, buf, count, offset); -} - - -extern -ssize_t __libc_pread(int fd, void *buf, size_t count, off_t offset); -__attribute__((weak)) -ssize_t pread(int fd, void *buf, size_t count, off_t offset) -{ - __my_pthread_testcancel(); - return __libc_pread(fd, buf, count, offset); -} - - -extern -void __libc_longjmp(jmp_buf env, int val) __attribute((noreturn)); -/* not weak: __attribute__((weak)) */ -void longjmp(jmp_buf env, int val) -{ - __libc_longjmp(env, val); -} - - -extern void __libc_siglongjmp (sigjmp_buf env, int val) - __attribute__ ((noreturn)); -void siglongjmp(sigjmp_buf env, int val) -{ - kludged("siglongjmp (cleanup handlers are ignored)"); - __libc_siglongjmp(env, val); -} - - -extern -int __libc_send(int s, const void *msg, size_t len, int flags); -__attribute__((weak)) -int send(int s, const void *msg, size_t len, int flags) -{ - __my_pthread_testcancel(); - return __libc_send(s, msg, len, flags); -} - - -extern -int __libc_recv(int s, void *buf, size_t len, int flags); -__attribute__((weak)) -int recv(int s, void *buf, size_t len, int flags) -{ - __my_pthread_testcancel(); - wait_for_fd_to_be_readable_or_erring(s); - __my_pthread_testcancel(); - return __libc_recv(s, buf, len, flags); -} - - -extern -int __libc_sendmsg(int s, const struct msghdr *msg, int flags); -__attribute__((weak)) -int sendmsg(int s, const struct msghdr *msg, int flags) -{ - __my_pthread_testcancel(); - return __libc_sendmsg(s, msg, flags); -} - - -extern -int __libc_recvmsg(int s, struct msghdr *msg, int flags); -__attribute__((weak)) -int recvmsg(int s, struct msghdr *msg, int flags) -{ - __my_pthread_testcancel(); - return __libc_recvmsg(s, msg, flags); -} - - -extern -int __libc_recvfrom(int s, void *buf, size_t len, int flags, - struct sockaddr *from, socklen_t *fromlen); -__attribute__((weak)) -int recvfrom(int s, void *buf, size_t len, int flags, - struct sockaddr *from, socklen_t *fromlen) -{ - __my_pthread_testcancel(); - wait_for_fd_to_be_readable_or_erring(s); - __my_pthread_testcancel(); - return __libc_recvfrom(s, buf, len, flags, from, fromlen); -} - - -extern -int __libc_sendto(int s, const void *msg, size_t len, int flags, - const struct sockaddr *to, socklen_t tolen); -__attribute__((weak)) -int sendto(int s, const void *msg, size_t len, int flags, - const struct sockaddr *to, socklen_t tolen) -{ - __my_pthread_testcancel(); - return __libc_sendto(s, msg, len, flags, to, tolen); -} - - -extern -int __libc_system(const char* str); -__attribute__((weak)) -int system(const char* str) -{ - __my_pthread_testcancel(); - return __libc_system(str); -} - - -extern -pid_t __libc_wait(int *status); -__attribute__((weak)) -pid_t wait(int *status) -{ - __my_pthread_testcancel(); - return __libc_wait(status); -} - - -extern -int __libc_msync(const void *start, size_t length, int flags); -__attribute__((weak)) -int msync(const void *start, size_t length, int flags) -{ - __my_pthread_testcancel(); - return __libc_msync(start, length, flags); -} - - -/*--- fork and its helper ---*/ - -static -void run_fork_handlers ( int what ) -{ - ForkHandlerEntry entry; - int n_h, n_handlers, i, res; - - my_assert(what == 0 || what == 1 || what == 2); - - /* Fetch old counter */ - VALGRIND_MAGIC_SEQUENCE(n_handlers, -2 /* default */, - VG_USERREQ__GET_FHSTACK_USED, - 0, 0, 0, 0); - my_assert(n_handlers >= 0 && n_handlers < VG_N_FORKHANDLERSTACK); - - /* Prepare handlers (what == 0) are called in opposite order of - calls to pthread_atfork. Parent and child handlers are called - in the same order as calls to pthread_atfork. */ - if (what == 0) - n_h = n_handlers - 1; - else - n_h = 0; - - for (i = 0; i < n_handlers; i++) { - VALGRIND_MAGIC_SEQUENCE(res, -2 /* default */, - VG_USERREQ__GET_FHSTACK_ENTRY, - n_h, &entry, 0, 0); - my_assert(res == 0); - switch (what) { - case 0: if (entry.prepare) entry.prepare(); - n_h--; break; - case 1: if (entry.parent) entry.parent(); - n_h++; break; - case 2: if (entry.child) entry.child(); - n_h++; break; - default: barf("run_fork_handlers: invalid what"); - } - } - - if (what != 0 /* prepare */) { - /* Empty out the stack. */ - VALGRIND_MAGIC_SEQUENCE(res, -2 /* default */, - VG_USERREQ__SET_FHSTACK_USED, - 0, 0, 0, 0); - my_assert(res == 0); - } -} - -extern -pid_t __libc_fork(void); -pid_t __fork(void) -{ - pid_t pid; - __my_pthread_testcancel(); - __pthread_mutex_lock(&pthread_atfork_lock); - - run_fork_handlers(0 /* prepare */); - pid = __libc_fork(); - if (pid == 0) { - /* I am the child */ - run_fork_handlers(2 /* child */); - __pthread_mutex_init(&pthread_atfork_lock, NULL); - } else { - /* I am the parent */ - run_fork_handlers(1 /* parent */); - __pthread_mutex_unlock(&pthread_atfork_lock); - } - return pid; -} - - - - -/* --------------------------------------------------------------------- - Nonblocking implementations of select() and poll(). This stuff will - surely rot your mind. - ------------------------------------------------------------------ */ - -/*--------------------------------------------------*/ - -#include "vg_kerneliface.h" - -static -__inline__ -int is_kerror ( int res ) -{ - if (res >= -4095 && res <= -1) - return 1; - else - return 0; -} - - -static -int my_do_syscall1 ( int syscallno, int arg1 ) -{ - int __res; - __asm__ volatile ("pushl %%ebx; movl %%edx,%%ebx ; int $0x80 ; popl %%ebx" - : "=a" (__res) - : "0" (syscallno), - "d" (arg1) ); - return __res; -} - -static -int my_do_syscall2 ( int syscallno, - int arg1, int arg2 ) -{ - int __res; - __asm__ volatile ("pushl %%ebx; movl %%edx,%%ebx ; int $0x80 ; popl %%ebx" - : "=a" (__res) - : "0" (syscallno), - "d" (arg1), - "c" (arg2) ); - return __res; -} - -static -int my_do_syscall3 ( int syscallno, - int arg1, int arg2, int arg3 ) -{ - int __res; - __asm__ volatile ("pushl %%ebx; movl %%esi,%%ebx ; int $0x80 ; popl %%ebx" - : "=a" (__res) - : "0" (syscallno), - "S" (arg1), - "c" (arg2), - "d" (arg3) ); - return __res; -} - -static -int do_syscall_select( int n, - vki_fd_set* readfds, - vki_fd_set* writefds, - vki_fd_set* exceptfds, - struct vki_timeval * timeout ) -{ - int res; - int args[5]; - args[0] = n; - args[1] = (int)readfds; - args[2] = (int)writefds; - args[3] = (int)exceptfds; - args[4] = (int)timeout; - res = my_do_syscall1(__NR_select, (int)(&(args[0])) ); - return res; -} - - -/* This is a wrapper round select(), which makes it thread-safe, - meaning that only this thread will block, rather than the entire - process. This wrapper in turn depends on nanosleep() not to block - the entire process, but I think (hope? suspect?) that POSIX - pthreads guarantees that to be the case. - - Basic idea is: modify the timeout parameter to select so that it - returns immediately. Poll like this until select returns non-zero, - indicating something interesting happened, or until our time is up. - Space out the polls with nanosleeps of say 20 milliseconds, which - is required to be nonblocking; this allows other threads to run. - - Assumes: - * (checked via my_assert) types fd_set and vki_fd_set are identical. - * (checked via my_assert) types timeval and vki_timeval are identical. - * (unchecked) libc error numbers (EINTR etc) are the negation of the - kernel's error numbers (VKI_EINTR etc). -*/ - -/* __attribute__((weak)) */ -int select ( int n, - fd_set *rfds, - fd_set *wfds, - fd_set *xfds, - struct timeval *timeout ) -{ - unsigned int ms_now, ms_end; - int res; - fd_set rfds_copy; - fd_set wfds_copy; - fd_set xfds_copy; - struct vki_timeval t_now; - struct vki_timeval zero_timeout; - struct vki_timespec nanosleep_interval; - - __my_pthread_testcancel(); - - /* gcc's complains about ms_end being used uninitialised -- classic - case it can't understand, where ms_end is both defined and used - only if timeout != NULL. Hence ... */ - ms_end = 0; - - /* We assume that the kernel and libc data layouts are identical - for the following types. These asserts provide a crude - check. */ - if (sizeof(fd_set) != sizeof(vki_fd_set) - || sizeof(struct timeval) != sizeof(struct vki_timeval)) - barf("valgrind's hacky non-blocking select(): data sizes error"); - - /* Detect the current time and simultaneously find out if we are - running on Valgrind. */ - VALGRIND_MAGIC_SEQUENCE(ms_now, 0xFFFFFFFF /* default */, - VG_USERREQ__READ_MILLISECOND_TIMER, - 0, 0, 0, 0); - - /* If a zero timeout specified, this call is harmless. Also go - this route if we're not running on Valgrind, for whatever - reason. */ - if ( (timeout && timeout->tv_sec == 0 && timeout->tv_usec == 0) - || (ms_now == 0xFFFFFFFF) ) { - res = do_syscall_select( n, (vki_fd_set*)rfds, - (vki_fd_set*)wfds, - (vki_fd_set*)xfds, - (struct vki_timeval*)timeout); - if (is_kerror(res)) { - * (__errno_location()) = -res; - return -1; - } else { - return res; - } - } - - /* If a timeout was specified, set ms_end to be the end millisecond - counter [wallclock] time. */ - if (timeout) { - res = my_do_syscall2(__NR_gettimeofday, (int)&t_now, (int)NULL); - my_assert(res == 0); - ms_end = ms_now; - ms_end += (timeout->tv_usec / 1000); - ms_end += (timeout->tv_sec * 1000); - /* Stay sane ... */ - my_assert (ms_end >= ms_now); - } - - /* fprintf(stderr, "MY_SELECT: before loop\n"); */ - - /* Either timeout == NULL, meaning wait indefinitely, or timeout != - NULL, in which case ms_end holds the end time. */ - - while (1) { - - /* First, do a return-immediately select(). */ - - /* These could be trashed each time round the loop, so restore - them each time. */ - if (rfds) rfds_copy = *rfds; - if (wfds) wfds_copy = *wfds; - if (xfds) xfds_copy = *xfds; - - zero_timeout.tv_sec = zero_timeout.tv_usec = 0; - - res = do_syscall_select( n, - rfds ? (vki_fd_set*)(&rfds_copy) : NULL, - wfds ? (vki_fd_set*)(&wfds_copy) : NULL, - xfds ? (vki_fd_set*)(&xfds_copy) : NULL, - & zero_timeout ); - if (is_kerror(res)) { - /* Some kind of error (including EINTR). Set errno and - return. The sets are unspecified in this case. */ - * (__errno_location()) = -res; - return -1; - } - if (res > 0) { - /* one or more fds is ready. Copy out resulting sets and - return. */ - if (rfds) *rfds = rfds_copy; - if (wfds) *wfds = wfds_copy; - if (xfds) *xfds = xfds_copy; - return res; - } - - /* Nothing interesting happened, so we go to sleep for a - while. */ - - /* fprintf(stderr, "MY_SELECT: nanosleep\n"); */ - /* nanosleep and go round again */ - nanosleep_interval.tv_sec = 0; - nanosleep_interval.tv_nsec = 50 * 1000 * 1000; /* 50 milliseconds */ - /* It's critical here that valgrind's nanosleep implementation - is nonblocking. */ - res = my_do_syscall2(__NR_nanosleep, - (int)(&nanosleep_interval), (int)NULL); - if (res == -VKI_EINTR) { - /* The nanosleep was interrupted by a signal. So we do the - same. */ - * (__errno_location()) = EINTR; - return -1; - } - - /* Sleeping finished. If a finite timeout, check to see if it - has expired yet. */ - if (timeout) { - VALGRIND_MAGIC_SEQUENCE(ms_now, 0xFFFFFFFF /* default */, - VG_USERREQ__READ_MILLISECOND_TIMER, - 0, 0, 0, 0); - my_assert(ms_now != 0xFFFFFFFF); - if (ms_now >= ms_end) { - /* timeout; nothing interesting happened. */ - if (rfds) FD_ZERO(rfds); - if (wfds) FD_ZERO(wfds); - if (xfds) FD_ZERO(xfds); - return 0; - } - } - - } -} - - - - -#include - -#ifndef HAVE_NFDS_T -typedef unsigned long int nfds_t; -#endif - - -/* __attribute__((weak)) */ -int poll (struct pollfd *__fds, nfds_t __nfds, int __timeout) -{ - unsigned int ms_now, ms_end; - int res, i; - struct vki_timespec nanosleep_interval; - - __my_pthread_testcancel(); - ensure_valgrind("poll"); - - /* Detect the current time and simultaneously find out if we are - running on Valgrind. */ - VALGRIND_MAGIC_SEQUENCE(ms_now, 0xFFFFFFFF /* default */, - VG_USERREQ__READ_MILLISECOND_TIMER, - 0, 0, 0, 0); - - if (/* CHECK SIZES FOR struct pollfd */ - sizeof(struct timeval) != sizeof(struct vki_timeval)) - barf("valgrind's hacky non-blocking poll(): data sizes error"); - - /* dummy initialisation to keep gcc -Wall happy */ - ms_end = 0; - - /* If a zero timeout specified, this call is harmless. Also do - this if not running on Valgrind. */ - if (__timeout == 0 || ms_now == 0xFFFFFFFF) { - res = my_do_syscall3(__NR_poll, (int)__fds, __nfds, __timeout); - if (is_kerror(res)) { - * (__errno_location()) = -res; - return -1; - } else { - return res; - } - } - - /* If a timeout was specified, set ms_end to be the end wallclock - time. Easy considering that __timeout is in milliseconds. */ - if (__timeout > 0) { - ms_end = ms_now + (unsigned int)__timeout; - } - - /* fprintf(stderr, "MY_POLL: before loop\n"); */ - - /* Either timeout < 0, meaning wait indefinitely, or timeout > 0, - in which case t_end holds the end time. */ - - my_assert(__timeout != 0); - - while (1) { - - /* Do a return-immediately poll. */ - - res = my_do_syscall3(__NR_poll, (int)__fds, __nfds, 0 ); - if (is_kerror(res)) { - /* Some kind of error. Set errno and return. */ - * (__errno_location()) = -res; - return -1; - } - if (res > 0) { - /* One or more fds is ready. Return now. */ - return res; - } - - /* Nothing interesting happened, so we go to sleep for a - while. */ - - /* fprintf(stderr, "MY_POLL: nanosleep\n"); */ - /* nanosleep and go round again */ - nanosleep_interval.tv_sec = 0; - nanosleep_interval.tv_nsec = 51 * 1000 * 1000; /* 51 milliseconds */ - /* It's critical here that valgrind's nanosleep implementation - is nonblocking. */ - (void)my_do_syscall2(__NR_nanosleep, - (int)(&nanosleep_interval), (int)NULL); - - /* Sleeping finished. If a finite timeout, check to see if it - has expired yet. */ - if (__timeout > 0) { - VALGRIND_MAGIC_SEQUENCE(ms_now, 0xFFFFFFFF /* default */, - VG_USERREQ__READ_MILLISECOND_TIMER, - 0, 0, 0, 0); - my_assert(ms_now != 0xFFFFFFFF); - if (ms_now >= ms_end) { - /* timeout; nothing interesting happened. */ - for (i = 0; i < __nfds; i++) - __fds[i].revents = 0; - return 0; - } - } - - } -} - - -/* Helper function used to make accept() non-blocking. Idea is to use - the above nonblocking poll() to make this thread ONLY wait for the - specified fd to become ready, and then return. */ - -/* Sigh -- a hack. We're not supposed to include this file directly; - should do it via /usr/include/fcntl.h, but that introduces a - varargs prototype for fcntl itself, which we can't mimic. */ -#define _FCNTL_H -#include - -static void wait_for_fd_to_be_readable_or_erring ( int fd ) -{ - struct pollfd pfd; - int res; - - /* fprintf(stderr, "wait_for_fd_to_be_readable_or_erring %d\n", fd); */ - - /* First check to see if the fd is nonblocking, and/or invalid. In - either case return immediately. */ - res = __libc_fcntl(fd, F_GETFL, 0); - if (res == -1) return; /* fd is invalid somehow */ - if (res & O_NONBLOCK) return; /* fd is nonblocking */ - - /* Ok, we'd better wait with poll. */ - pfd.fd = fd; - pfd.events = POLLIN | POLLPRI | POLLERR | POLLHUP | POLLNVAL; - /* ... but not POLLOUT, you may notice. */ - pfd.revents = 0; - (void)poll(&pfd, 1, -1 /* forever */); -} - - -/* --------------------------------------------------------------------- - Hacky implementation of semaphores. - ------------------------------------------------------------------ */ - -#include - -/* This is a terrible way to do the remapping. Plan is to import an - AVL tree at some point. */ - -typedef - struct { - pthread_mutex_t se_mx; - pthread_cond_t se_cv; - int count; - } - vg_sem_t; - -static pthread_mutex_t se_remap_mx = PTHREAD_MUTEX_INITIALIZER; - -static int se_remap_used = 0; -static sem_t* se_remap_orig[VG_N_SEMAPHORES]; -static vg_sem_t se_remap_new[VG_N_SEMAPHORES]; - -static vg_sem_t* se_remap ( sem_t* orig ) -{ - int res, i; - res = __pthread_mutex_lock(&se_remap_mx); - my_assert(res == 0); - - for (i = 0; i < se_remap_used; i++) { - if (se_remap_orig[i] == orig) - break; - } - if (i == se_remap_used) { - if (se_remap_used == VG_N_SEMAPHORES) { - res = pthread_mutex_unlock(&se_remap_mx); - my_assert(res == 0); - barf("VG_N_SEMAPHORES is too low. Increase and recompile."); - } - se_remap_used++; - se_remap_orig[i] = orig; - /* printf("allocated semaphore %d\n", i); */ - } - res = __pthread_mutex_unlock(&se_remap_mx); - my_assert(res == 0); - return &se_remap_new[i]; -} - - -int sem_init(sem_t *sem, int pshared, unsigned int value) -{ - int res; - vg_sem_t* vg_sem; - ensure_valgrind("sem_init"); - if (pshared != 0) { - pthread_error("sem_init: unsupported pshared value"); - errno = ENOSYS; - return -1; - } - vg_sem = se_remap(sem); - res = pthread_mutex_init(&vg_sem->se_mx, NULL); - my_assert(res == 0); - res = pthread_cond_init(&vg_sem->se_cv, NULL); - my_assert(res == 0); - vg_sem->count = value; - return 0; -} - - -int sem_wait ( sem_t* sem ) -{ - int res; - vg_sem_t* vg_sem; - ensure_valgrind("sem_wait"); - vg_sem = se_remap(sem); - res = __pthread_mutex_lock(&vg_sem->se_mx); - my_assert(res == 0); - while (vg_sem->count == 0) { - res = pthread_cond_wait(&vg_sem->se_cv, &vg_sem->se_mx); - my_assert(res == 0); - } - vg_sem->count--; - res = __pthread_mutex_unlock(&vg_sem->se_mx); - my_assert(res == 0); - return 0; -} - -int sem_post ( sem_t* sem ) -{ - int res; - vg_sem_t* vg_sem; - ensure_valgrind("sem_post"); - vg_sem = se_remap(sem); - res = __pthread_mutex_lock(&vg_sem->se_mx); - my_assert(res == 0); - if (vg_sem->count == 0) { - vg_sem->count++; - res = pthread_cond_broadcast(&vg_sem->se_cv); - my_assert(res == 0); - } else { - vg_sem->count++; - } - res = __pthread_mutex_unlock(&vg_sem->se_mx); - my_assert(res == 0); - return 0; -} - - -int sem_trywait ( sem_t* sem ) -{ - int ret, res; - vg_sem_t* vg_sem; - ensure_valgrind("sem_trywait"); - vg_sem = se_remap(sem); - res = __pthread_mutex_lock(&vg_sem->se_mx); - my_assert(res == 0); - if (vg_sem->count > 0) { - vg_sem->count--; - ret = 0; - } else { - ret = -1; - errno = EAGAIN; - } - res = __pthread_mutex_unlock(&vg_sem->se_mx); - my_assert(res == 0); - return ret; -} - - -int sem_getvalue(sem_t* sem, int * sval) -{ - vg_sem_t* vg_sem; - ensure_valgrind("sem_trywait"); - vg_sem = se_remap(sem); - *sval = vg_sem->count; - return 0; -} - - -int sem_destroy(sem_t * sem) -{ - kludged("sem_destroy"); - /* if someone waiting on this semaphore, errno = EBUSY, return -1 */ - return 0; -} - - -/* --------------------------------------------------------------------- - Reader-writer locks. - ------------------------------------------------------------------ */ - -typedef - struct { - int initted; /* != 0 --> in use; sanity check only */ - int prefer_w; /* != 0 --> prefer writer */ - int nwait_r; /* # of waiting readers */ - int nwait_w; /* # of waiting writers */ - pthread_cond_t cv_r; /* for signalling readers */ - pthread_cond_t cv_w; /* for signalling writers */ - pthread_mutex_t mx; - int status; - /* allowed range for status: >= -1. -1 means 1 writer currently - active, >= 0 means N readers currently active. */ - } - vg_rwlock_t; - - -static pthread_mutex_t rw_remap_mx = PTHREAD_MUTEX_INITIALIZER; - -static int rw_remap_used = 0; -static pthread_rwlock_t* rw_remap_orig[VG_N_RWLOCKS]; -static vg_rwlock_t rw_remap_new[VG_N_RWLOCKS]; - - -static -void init_vg_rwlock ( vg_rwlock_t* vg_rwl ) -{ - int res = 0; - vg_rwl->initted = 1; - vg_rwl->prefer_w = 1; - vg_rwl->nwait_r = 0; - vg_rwl->nwait_w = 0; - vg_rwl->status = 0; - res = pthread_mutex_init(&vg_rwl->mx, NULL); - res |= pthread_cond_init(&vg_rwl->cv_r, NULL); - res |= pthread_cond_init(&vg_rwl->cv_w, NULL); - my_assert(res == 0); -} - - -/* Take the address of a LinuxThreads rwlock_t and return the shadow - address of our version. Further, if the LinuxThreads version - appears to have been statically initialised, do the same to the one - we allocate here. The pthread_rwlock_t.__rw_readers field is set - to zero by PTHREAD_RWLOCK_INITIALIZER, so we take zero as meaning - uninitialised and non-zero meaning initialised. -*/ -static vg_rwlock_t* rw_remap ( pthread_rwlock_t* orig ) -{ - int res, i; - vg_rwlock_t* vg_rwl; - res = __pthread_mutex_lock(&rw_remap_mx); - my_assert(res == 0); - - for (i = 0; i < rw_remap_used; i++) { - if (rw_remap_orig[i] == orig) - break; - } - if (i == rw_remap_used) { - if (rw_remap_used == VG_N_RWLOCKS) { - res = __pthread_mutex_unlock(&rw_remap_mx); - my_assert(res == 0); - barf("VG_N_RWLOCKS is too low. Increase and recompile."); - } - rw_remap_used++; - rw_remap_orig[i] = orig; - rw_remap_new[i].initted = 0; - if (0) printf("allocated rwlock %d\n", i); - } - res = __pthread_mutex_unlock(&rw_remap_mx); - my_assert(res == 0); - vg_rwl = &rw_remap_new[i]; - - /* Initialise the shadow, if required. */ - if (orig->__rw_readers == 0) { - orig->__rw_readers = 1; - init_vg_rwlock(vg_rwl); - if (orig->__rw_kind == PTHREAD_RWLOCK_PREFER_READER_NP) - vg_rwl->prefer_w = 0; - } - - return vg_rwl; -} - - -int pthread_rwlock_init ( pthread_rwlock_t* orig, - const pthread_rwlockattr_t* attr ) -{ - vg_rwlock_t* rwl; - if (0) printf ("pthread_rwlock_init\n"); - /* Force the remapper to initialise the shadow. */ - orig->__rw_readers = 0; - /* Install the lock preference; the remapper needs to know it. */ - orig->__rw_kind = PTHREAD_RWLOCK_DEFAULT_NP; - if (attr) - orig->__rw_kind = attr->__lockkind; - rwl = rw_remap ( orig ); - return 0; -} - - -static -void pthread_rwlock_rdlock_CANCEL_HDLR ( void* rwl_v ) -{ - vg_rwlock_t* rwl = (vg_rwlock_t*)rwl_v; - rwl->nwait_r--; - pthread_mutex_unlock (&rwl->mx); -} - - -int pthread_rwlock_rdlock ( pthread_rwlock_t* orig ) -{ - int res; - vg_rwlock_t* rwl; - if (0) printf ("pthread_rwlock_rdlock\n"); - rwl = rw_remap ( orig ); - res = __pthread_mutex_lock(&rwl->mx); - my_assert(res == 0); - if (!rwl->initted) { - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return EINVAL; - } - if (rwl->status < 0) { - my_assert(rwl->status == -1); - rwl->nwait_r++; - pthread_cleanup_push( pthread_rwlock_rdlock_CANCEL_HDLR, rwl ); - while (1) { - if (rwl->status == 0) break; - res = pthread_cond_wait(&rwl->cv_r, &rwl->mx); - my_assert(res == 0); - } - pthread_cleanup_pop(0); - rwl->nwait_r--; - } - my_assert(rwl->status >= 0); - rwl->status++; - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return 0; -} - - -int pthread_rwlock_tryrdlock ( pthread_rwlock_t* orig ) -{ - int res; - vg_rwlock_t* rwl; - if (0) printf ("pthread_rwlock_tryrdlock\n"); - rwl = rw_remap ( orig ); - res = __pthread_mutex_lock(&rwl->mx); - my_assert(res == 0); - if (!rwl->initted) { - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return EINVAL; - } - if (rwl->status == -1) { - /* Writer active; we have to give up. */ - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return EBUSY; - } - /* Success */ - my_assert(rwl->status >= 0); - rwl->status++; - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return 0; -} - - -static -void pthread_rwlock_wrlock_CANCEL_HDLR ( void* rwl_v ) -{ - vg_rwlock_t* rwl = (vg_rwlock_t*)rwl_v; - rwl->nwait_w--; - pthread_mutex_unlock (&rwl->mx); -} - - -int pthread_rwlock_wrlock ( pthread_rwlock_t* orig ) -{ - int res; - vg_rwlock_t* rwl; - if (0) printf ("pthread_rwlock_wrlock\n"); - rwl = rw_remap ( orig ); - res = __pthread_mutex_lock(&rwl->mx); - my_assert(res == 0); - if (!rwl->initted) { - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return EINVAL; - } - if (rwl->status != 0) { - rwl->nwait_w++; - pthread_cleanup_push( pthread_rwlock_wrlock_CANCEL_HDLR, rwl ); - while (1) { - if (rwl->status == 0) break; - res = pthread_cond_wait(&rwl->cv_w, &rwl->mx); - my_assert(res == 0); - } - pthread_cleanup_pop(0); - rwl->nwait_w--; - } - my_assert(rwl->status == 0); - rwl->status = -1; - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return 0; -} - - -int pthread_rwlock_trywrlock ( pthread_rwlock_t* orig ) -{ - int res; - vg_rwlock_t* rwl; - if (0) printf ("pthread_wrlock_trywrlock\n"); - rwl = rw_remap ( orig ); - res = __pthread_mutex_lock(&rwl->mx); - my_assert(res == 0); - if (!rwl->initted) { - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return EINVAL; - } - if (rwl->status != 0) { - /* Reader(s) or a writer active; we have to give up. */ - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return EBUSY; - } - /* Success */ - my_assert(rwl->status == 0); - rwl->status = -1; - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return 0; -} - - -int pthread_rwlock_unlock ( pthread_rwlock_t* orig ) -{ - int res; - vg_rwlock_t* rwl; - if (0) printf ("pthread_rwlock_unlock\n"); - rwl = rw_remap ( orig ); - rwl = rw_remap ( orig ); - res = __pthread_mutex_lock(&rwl->mx); - my_assert(res == 0); - if (!rwl->initted) { - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return EINVAL; - } - if (rwl->status == 0) { - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return EPERM; - } - my_assert(rwl->status != 0); - if (rwl->status == -1) { - rwl->status = 0; - } else { - my_assert(rwl->status > 0); - rwl->status--; - } - - my_assert(rwl->status >= 0); - - if (rwl->prefer_w) { - - /* Favour waiting writers, if any. */ - if (rwl->nwait_w > 0) { - /* Writer(s) are waiting. */ - if (rwl->status == 0) { - /* We can let a writer in. */ - res = pthread_cond_signal(&rwl->cv_w); - my_assert(res == 0); - } else { - /* There are still readers active. Do nothing; eventually - they will disappear, at which point a writer will be - admitted. */ - } - } - else - /* No waiting writers. */ - if (rwl->nwait_r > 0) { - /* Let in a waiting reader. */ - res = pthread_cond_signal(&rwl->cv_r); - my_assert(res == 0); - } - - } else { - - /* Favour waiting readers, if any. */ - if (rwl->nwait_r > 0) { - /* Reader(s) are waiting; let one in. */ - res = pthread_cond_signal(&rwl->cv_r); - my_assert(res == 0); - } - else - /* No waiting readers. */ - if (rwl->nwait_w > 0 && rwl->status == 0) { - /* We have waiting writers and no active readers; let a - writer in. */ - res = pthread_cond_signal(&rwl->cv_w); - my_assert(res == 0); - } - } - - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return 0; -} - - -int pthread_rwlock_destroy ( pthread_rwlock_t *orig ) -{ - int res; - vg_rwlock_t* rwl; - if (0) printf ("pthread_rwlock_destroy\n"); - rwl = rw_remap ( orig ); - res = __pthread_mutex_lock(&rwl->mx); - my_assert(res == 0); - if (!rwl->initted) { - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return EINVAL; - } - if (rwl->status != 0 || rwl->nwait_r > 0 || rwl->nwait_w > 0) { - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return EBUSY; - } - rwl->initted = 0; - res = __pthread_mutex_unlock(&rwl->mx); - my_assert(res == 0); - return 0; -} - - -/* Copied directly from LinuxThreads. */ -int -pthread_rwlockattr_init (pthread_rwlockattr_t *attr) -{ - attr->__lockkind = 0; - attr->__pshared = PTHREAD_PROCESS_PRIVATE; - - return 0; -} - -/* Copied directly from LinuxThreads. */ -int -pthread_rwlockattr_setpshared (pthread_rwlockattr_t *attr, int pshared) -{ - if (pshared != PTHREAD_PROCESS_PRIVATE && pshared != PTHREAD_PROCESS_SHARED) - return EINVAL; - - /* For now it is not possible to shared a conditional variable. */ - if (pshared != PTHREAD_PROCESS_PRIVATE) - return ENOSYS; - - attr->__pshared = pshared; - - return 0; -} - - -/* --------------------------------------------------------------------- - B'stard. - ------------------------------------------------------------------ */ - -# define strong_alias(name, aliasname) \ - extern __typeof (name) aliasname __attribute__ ((alias (#name))); - -# define weak_alias(name, aliasname) \ - extern __typeof (name) aliasname __attribute__ ((weak, alias (#name))); - -strong_alias(__pthread_mutex_lock, pthread_mutex_lock) -strong_alias(__pthread_mutex_trylock, pthread_mutex_trylock) -strong_alias(__pthread_mutex_unlock, pthread_mutex_unlock) -strong_alias(__pthread_mutexattr_init, pthread_mutexattr_init) - weak_alias(__pthread_mutexattr_settype, pthread_mutexattr_settype) -strong_alias(__pthread_mutex_init, pthread_mutex_init) -strong_alias(__pthread_mutexattr_destroy, pthread_mutexattr_destroy) -strong_alias(__pthread_mutex_destroy, pthread_mutex_destroy) -strong_alias(__pthread_once, pthread_once) -strong_alias(__pthread_atfork, pthread_atfork) -strong_alias(__pthread_key_create, pthread_key_create) -strong_alias(__pthread_getspecific, pthread_getspecific) -strong_alias(__pthread_setspecific, pthread_setspecific) - -#ifndef GLIBC_2_1 -strong_alias(sigaction, __sigaction) -#endif - -strong_alias(close, __close) -strong_alias(fcntl, __fcntl) -strong_alias(lseek, __lseek) -strong_alias(open, __open) -strong_alias(open64, __open64) -strong_alias(read, __read) -strong_alias(wait, __wait) -strong_alias(write, __write) -strong_alias(connect, __connect) -strong_alias(send, __send) - -weak_alias (__pread64, pread64) -weak_alias (__pwrite64, pwrite64) -weak_alias(__fork, fork) - -weak_alias (__pthread_kill_other_threads_np, pthread_kill_other_threads_np) - -/*--------------------------------------------------*/ - -weak_alias(pthread_rwlock_rdlock, __pthread_rwlock_rdlock) -weak_alias(pthread_rwlock_unlock, __pthread_rwlock_unlock) -weak_alias(pthread_rwlock_wrlock, __pthread_rwlock_wrlock) - -weak_alias(pthread_rwlock_destroy, __pthread_rwlock_destroy) -weak_alias(pthread_rwlock_init, __pthread_rwlock_init) -weak_alias(pthread_rwlock_tryrdlock, __pthread_rwlock_tryrdlock) -weak_alias(pthread_rwlock_trywrlock, __pthread_rwlock_trywrlock) - - -/* I've no idea what these are, but they get called quite a lot. - Anybody know? */ - -#undef _IO_flockfile -void _IO_flockfile ( _IO_FILE * file ) -{ - pthread_mutex_lock(file->_lock); -} -weak_alias(_IO_flockfile, flockfile); - - -#undef _IO_funlockfile -void _IO_funlockfile ( _IO_FILE * file ) -{ - pthread_mutex_unlock(file->_lock); -} -weak_alias(_IO_funlockfile, funlockfile); - - -/* This doesn't seem to be needed to simulate libpthread.so's external - interface, but many people complain about its absence. */ - -strong_alias(__pthread_mutexattr_settype, __pthread_mutexattr_setkind_np) -weak_alias(__pthread_mutexattr_setkind_np, pthread_mutexattr_setkind_np) - - -/*--------------------------------------------------------------------*/ -/*--- end vg_libpthread.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_libpthread.vs b/coregrind/vg_libpthread.vs deleted file mode 100644 index 69efdcc363..0000000000 --- a/coregrind/vg_libpthread.vs +++ /dev/null @@ -1,19 +0,0 @@ - -GLIBC_2.0 { -}; - -GLIBC_2.1 { -} GLIBC_2.0; - -GLIBC_2.2 { -} GLIBC_2.1; - -GLIBC_2.2.3 { - __pthread_clock_gettime; - __pthread_clock_settime; -} GLIBC_2.2; - -GLIBC_PRIVATE { - __pthread_clock_gettime; - __pthread_clock_settime; -}; diff --git a/coregrind/vg_libpthread_unimp.c b/coregrind/vg_libpthread_unimp.c deleted file mode 100644 index f413887f27..0000000000 --- a/coregrind/vg_libpthread_unimp.c +++ /dev/null @@ -1,262 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- Give dummy bindings for everything the real libpthread.so ---*/ -/*--- binds. vg_libpthread_unimp.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -/* --------------------------------------------------------------------- - ALL THIS CODE RUNS ON THE SIMULATED CPU. - Give a binding for everything the real libpthread.so binds. - ------------------------------------------------------------------ */ - -extern void vgPlain_unimp ( char* ); -#define unimp(str) vgPlain_unimp(str) - -//void _IO_flockfile ( void ) { unimp("_IO_flockfile"); } -void _IO_ftrylockfile ( void ) { unimp("_IO_ftrylockfile"); } -//void _IO_funlockfile ( void ) { unimp("_IO_funlockfile"); } -//void __close ( void ) { unimp("__close"); } -//void __connect ( void ) { unimp("__connect"); } -//void __errno_location ( void ) { unimp("__errno_location"); } -//void __fcntl ( void ) { unimp("__fcntl"); } -//void __fork ( void ) { unimp("__fork"); } -//void __h_errno_location ( void ) { unimp("__h_errno_location"); } -void __libc_allocate_rtsig ( void ) { unimp("__libc_allocate_rtsig"); } -void __libc_current_sigrtmax ( void ) { unimp("__libc_current_sigrtmax"); } -void __libc_current_sigrtmin ( void ) { unimp("__libc_current_sigrtmin"); } -//void __lseek ( void ) { unimp("__lseek"); } -//void __open ( void ) { unimp("__open"); } -//void __open64 ( void ) { unimp("__open64"); } -//void __pread64 ( void ) { unimp("__pread64"); } -//void __pthread_atfork ( void ) { unimp("__pthread_atfork"); } -//void __pthread_getspecific ( void ) { unimp("__pthread_getspecific"); } -//void __pthread_key_create ( void ) { unimp("__pthread_key_create"); } -//void __pthread_kill_other_threads_np ( void ) { unimp("__pthread_kill_other_threads_np"); } -//void __pthread_mutex_destroy ( void ) { unimp("__pthread_mutex_destroy"); } -//void __pthread_mutex_init ( void ) { unimp("__pthread_mutex_init"); } -//void __pthread_mutex_lock ( void ) { unimp("__pthread_mutex_lock"); } -//void __pthread_mutex_trylock ( void ) { unimp("__pthread_mutex_trylock"); } -//void __pthread_mutex_unlock ( void ) { unimp("__pthread_mutex_unlock"); } -//void __pthread_mutexattr_destroy ( void ) { unimp("__pthread_mutexattr_destroy"); } -//void __pthread_mutexattr_init ( void ) { unimp("__pthread_mutexattr_init"); } -//void __pthread_mutexattr_settype ( void ) { unimp("__pthread_mutexattr_settype"); } -//void __pthread_once ( void ) { unimp("__pthread_once"); } -//void __pthread_setspecific ( void ) { unimp("__pthread_setspecific"); } -//void __pwrite64 ( void ) { unimp("__pwrite64"); } -//void __read ( void ) { unimp("__read"); } -//void __res_state ( void ) { unimp("__res_state"); } -//void __send ( void ) { unimp("__send"); } -//void __sigaction ( void ) { unimp("__sigaction"); } -//--//void __vfork ( void ) { unimp("__vfork"); } -//void __wait ( void ) { unimp("__wait"); } -//void __write ( void ) { unimp("__write"); } -//void _pthread_cleanup_pop ( void ) { unimp("_pthread_cleanup_pop"); } -//void _pthread_cleanup_pop_restore ( void ) { unimp("_pthread_cleanup_pop_restore"); } -//void _pthread_cleanup_push ( void ) { unimp("_pthread_cleanup_push"); } -//void _pthread_cleanup_push_defer ( void ) { unimp("_pthread_cleanup_push_defer"); } -//void longjmp ( void ) { unimp("longjmp"); } -//void pthread_atfork ( void ) { unimp("pthread_atfork"); } -//void pthread_attr_destroy ( void ) { unimp("pthread_attr_destroy"); } -void pthread_attr_getdetachstate ( void ) { unimp("pthread_attr_getdetachstate"); } -void pthread_attr_getinheritsched ( void ) { unimp("pthread_attr_getinheritsched"); } -//void pthread_attr_getschedparam ( void ) { unimp("pthread_attr_getschedparam"); } -//void pthread_attr_getschedpolicy ( void ) { unimp("pthread_attr_getschedpolicy"); } -//void pthread_attr_getscope ( void ) { unimp("pthread_attr_getscope"); } - -//void pthread_attr_setdetachstate ( void ) { unimp("pthread_attr_setdetachstate"); } -//void pthread_attr_setinheritsched ( void ) { unimp("pthread_attr_setinheritsched"); } -//void pthread_attr_setschedparam ( void ) { unimp("pthread_attr_setschedparam"); } -//void pthread_attr_setschedpolicy ( void ) { unimp("pthread_attr_setschedpolicy"); } -//void pthread_attr_setscope ( void ) { unimp("pthread_attr_setscope"); } -void pthread_barrier_destroy ( void ) { unimp("pthread_barrier_destroy"); } -void pthread_barrier_init ( void ) { unimp("pthread_barrier_init"); } -void pthread_barrier_wait ( void ) { unimp("pthread_barrier_wait"); } -void pthread_barrierattr_destroy ( void ) { unimp("pthread_barrierattr_destroy"); } -void pthread_barrierattr_init ( void ) { unimp("pthread_barrierattr_init"); } -void pthread_barrierattr_setpshared ( void ) { unimp("pthread_barrierattr_setpshared"); } -//void pthread_cancel ( void ) { unimp("pthread_cancel"); } -//void pthread_cond_broadcast ( void ) { unimp("pthread_cond_broadcast"); } -//void pthread_cond_destroy ( void ) { unimp("pthread_cond_destroy"); } -//void pthread_cond_init ( void ) { unimp("pthread_cond_init"); } -//void pthread_cond_signal ( void ) { unimp("pthread_cond_signal"); } -//void pthread_cond_timedwait ( void ) { unimp("pthread_cond_timedwait"); } -//void pthread_cond_wait ( void ) { unimp("pthread_cond_wait"); } -//void pthread_condattr_destroy ( void ) { unimp("pthread_condattr_destroy"); } -void pthread_condattr_getpshared ( void ) { unimp("pthread_condattr_getpshared"); } -//void pthread_condattr_init ( void ) { unimp("pthread_condattr_init"); } -void pthread_condattr_setpshared ( void ) { unimp("pthread_condattr_setpshared"); } -//void pthread_detach ( void ) { unimp("pthread_detach"); } -//void pthread_equal ( void ) { unimp("pthread_equal"); } -//void pthread_exit ( void ) { unimp("pthread_exit"); } -//void pthread_getattr_np ( void ) { unimp("pthread_getattr_np"); } -void pthread_getcpuclockid ( void ) { unimp("pthread_getcpuclockid"); } -//void pthread_getschedparam ( void ) { unimp("pthread_getschedparam"); } -//void pthread_getspecific ( void ) { unimp("pthread_getspecific"); } -//void pthread_join ( void ) { unimp("pthread_join"); } -//void pthread_key_create ( void ) { unimp("pthread_key_create"); } -//void pthread_key_delete ( void ) { unimp("pthread_key_delete"); } -//void pthread_kill ( void ) { unimp("pthread_kill"); } -//void pthread_mutex_destroy ( void ) { unimp("pthread_mutex_destroy"); } -//void pthread_mutex_init ( void ) { unimp("pthread_mutex_init"); } -//void pthread_mutex_lock ( void ) { unimp("pthread_mutex_lock"); } -void pthread_mutex_timedlock ( void ) { unimp("pthread_mutex_timedlock"); } -//void pthread_mutex_trylock ( void ) { unimp("pthread_mutex_trylock"); } -//void pthread_mutex_unlock ( void ) { unimp("pthread_mutex_unlock"); } -//void pthread_mutexattr_destroy ( void ) { unimp("pthread_mutexattr_destroy"); } -//void pthread_mutexattr_init ( void ) { unimp("pthread_mutexattr_init"); } -//void pthread_once ( void ) { unimp("pthread_once"); } -//void pthread_rwlock_destroy ( void ) { unimp("pthread_rwlock_destroy"); } -//void pthread_rwlock_init ( void ) { unimp("pthread_rwlock_init"); } -//void pthread_rwlock_rdlock ( void ) { unimp("pthread_rwlock_rdlock"); } -void pthread_rwlock_timedrdlock ( void ) { unimp("pthread_rwlock_timedrdlock"); } -void pthread_rwlock_timedwrlock ( void ) { unimp("pthread_rwlock_timedwrlock"); } -//void pthread_rwlock_tryrdlock ( void ) { unimp("pthread_rwlock_tryrdlock"); } -//void pthread_rwlock_trywrlock ( void ) { unimp("pthread_rwlock_trywrlock"); } -//void pthread_rwlock_unlock ( void ) { unimp("pthread_rwlock_unlock"); } -//void pthread_rwlock_wrlock ( void ) { unimp("pthread_rwlock_wrlock"); } -void pthread_rwlockattr_destroy ( void ) { unimp("pthread_rwlockattr_destroy"); } -void pthread_rwlockattr_getkind_np ( void ) { unimp("pthread_rwlockattr_getkind_np"); } -void pthread_rwlockattr_getpshared ( void ) { unimp("pthread_rwlockattr_getpshared"); } -//void pthread_rwlockattr_init ( void ) { unimp("pthread_rwlockattr_init"); } -void pthread_rwlockattr_setkind_np ( void ) { unimp("pthread_rwlockattr_setkind_np"); } -//void pthread_rwlockattr_setpshared ( void ) { unimp("pthread_rwlockattr_setpshared"); } -//void pthread_self ( void ) { unimp("pthread_self"); } -//void pthread_setcancelstate ( void ) { unimp("pthread_setcancelstate"); } -//void pthread_setcanceltype ( void ) { unimp("pthread_setcanceltype"); } -//void pthread_setschedparam ( void ) { unimp("pthread_setschedparam"); } -//void pthread_setspecific ( void ) { unimp("pthread_setspecific"); } -//void pthread_sigmask ( void ) { unimp("pthread_sigmask"); } -//void pthread_testcancel ( void ) { unimp("pthread_testcancel"); } -//void raise ( void ) { unimp("raise"); } -void sem_close ( void ) { unimp("sem_close"); } -void sem_open ( void ) { unimp("sem_open"); } -void sem_timedwait ( void ) { unimp("sem_timedwait"); } -void sem_unlink ( void ) { unimp("sem_unlink"); } -//void sigaction ( void ) { unimp("sigaction"); } -//void siglongjmp ( void ) { unimp("siglongjmp"); } -//void sigwait ( void ) { unimp("sigwait"); } - -void __pthread_clock_gettime ( void ) { unimp("__pthread_clock_gettime"); } -void __pthread_clock_settime ( void ) { unimp("__pthread_clock_settime"); } - -#if 0 -void pthread_create@@GLIBC_2.1 ( void ) { unimp("pthread_create@@GLIBC_2.1"); } -void pthread_create@GLIBC_2.0 ( void ) { unimp("pthread_create@GLIBC_2.0"); } - -void sem_wait@@GLIBC_2.1 ( void ) { unimp("sem_wait@@GLIBC_2.1"); } -void sem_wait@GLIBC_2.0 ( void ) { unimp("sem_wait@GLIBC_2.0"); } - -void sem_trywait@@GLIBC_2.1 ( void ) { unimp("sem_trywait@@GLIBC_2.1"); } -void sem_trywait@GLIBC_2.0 ( void ) { unimp("sem_trywait@GLIBC_2.0"); } - -void sem_post@@GLIBC_2.1 ( void ) { unimp("sem_post@@GLIBC_2.1"); } -void sem_post@GLIBC_2.0 ( void ) { unimp("sem_post@GLIBC_2.0"); } - -void sem_destroy@@GLIBC_2.1 ( void ) { unimp("sem_destroy@@GLIBC_2.1"); } -void sem_destroy@GLIBC_2.0 ( void ) { unimp("sem_destroy@GLIBC_2.0"); } -void sem_getvalue@@GLIBC_2.1 ( void ) { unimp("sem_getvalue@@GLIBC_2.1"); } -void sem_getvalue@GLIBC_2.0 ( void ) { unimp("sem_getvalue@GLIBC_2.0"); } -void sem_init@@GLIBC_2.1 ( void ) { unimp("sem_init@@GLIBC_2.1"); } -void sem_init@GLIBC_2.0 ( void ) { unimp("sem_init@GLIBC_2.0"); } - -void pthread_attr_init@@GLIBC_2.1 ( void ) { unimp("pthread_attr_init@@GLIBC_2.1"); } -void pthread_attr_init@GLIBC_2.0 ( void ) { unimp("pthread_attr_init@GLIBC_2.0"); } -#endif - - - -# define strong_alias(name, aliasname) \ - extern __typeof (name) aliasname __attribute__ ((alias (#name))); - -# define weak_alias(name, aliasname) \ - extern __typeof (name) aliasname __attribute__ ((weak, alias (#name))); - -//weak_alias(pthread_rwlock_destroy, __pthread_rwlock_destroy) -//weak_alias(pthread_rwlock_init, __pthread_rwlock_init) -//weak_alias(pthread_rwlock_tryrdlock, __pthread_rwlock_tryrdlock) -//weak_alias(pthread_rwlock_trywrlock, __pthread_rwlock_trywrlock) -//weak_alias(pthread_rwlock_wrlock, __pthread_rwlock_wrlock) -weak_alias(_IO_ftrylockfile, ftrylockfile) - -//__attribute__((weak)) void pread ( void ) { vgPlain_unimp("pread"); } -//__attribute__((weak)) void pwrite ( void ) { vgPlain_unimp("pwrite"); } -//__attribute__((weak)) void msync ( void ) { vgPlain_unimp("msync"); } -//__attribute__((weak)) void pause ( void ) { vgPlain_unimp("pause"); } -//__attribute__((weak)) void recvfrom ( void ) { vgPlain_unimp("recvfrom"); } -//__attribute__((weak)) void recvmsg ( void ) { vgPlain_unimp("recvmsg"); } -//__attribute__((weak)) void sendmsg ( void ) { vgPlain_unimp("sendmsg"); } -__attribute__((weak)) void tcdrain ( void ) { vgPlain_unimp("tcdrain"); } -//--//__attribute__((weak)) void vfork ( void ) { vgPlain_unimp("vfork"); } - -__attribute__((weak)) void pthread_attr_getguardsize ( void ) - { vgPlain_unimp("pthread_attr_getguardsize"); } -__attribute__((weak)) void pthread_attr_getstack ( void ) - { vgPlain_unimp("pthread_attr_getstack"); } -__attribute__((weak)) void pthread_attr_getstackaddr ( void ) - { vgPlain_unimp("pthread_attr_getstackaddr"); } -__attribute__((weak)) void pthread_attr_getstacksize ( void ) - { vgPlain_unimp("pthread_attr_getstacksize"); } -__attribute__((weak)) void pthread_attr_setguardsize ( void ) - { vgPlain_unimp("pthread_attr_setguardsize"); } -__attribute__((weak)) void pthread_attr_setstack ( void ) - { vgPlain_unimp("pthread_attr_setstack"); } -__attribute__((weak)) void pthread_attr_setstackaddr ( void ) - { vgPlain_unimp("pthread_attr_setstackaddr"); } -//__attribute__((weak)) void pthread_attr_setstacksize ( void ) -// { vgPlain_unimp("pthread_attr_setstacksize"); } -__attribute__((weak)) void pthread_getconcurrency ( void ) - { vgPlain_unimp("pthread_getconcurrency"); } -//__attribute__((weak)) void pthread_kill_other_threads_np ( void ) -// { vgPlain_unimp("pthread_kill_other_threads_np"); } -__attribute__((weak)) void pthread_mutexattr_getkind_np ( void ) - { vgPlain_unimp("pthread_mutexattr_getkind_np"); } -__attribute__((weak)) void pthread_mutexattr_getpshared ( void ) - { vgPlain_unimp("pthread_mutexattr_getpshared"); } -__attribute__((weak)) void pthread_mutexattr_gettype ( void ) - { vgPlain_unimp("pthread_mutexattr_gettype"); } -__attribute__((weak)) void pthread_mutexattr_setkind_np ( void ) - { vgPlain_unimp("pthread_mutexattr_setkind_np"); } -__attribute__((weak)) void pthread_mutexattr_setpshared ( void ) - { vgPlain_unimp("pthread_mutexattr_setpshared"); } -__attribute__((weak)) void pthread_setconcurrency ( void ) - { vgPlain_unimp("pthread_setconcurrency"); } -__attribute__((weak)) void pthread_spin_destroy ( void ) - { vgPlain_unimp("pthread_spin_destroy"); } -__attribute__((weak)) void pthread_spin_init ( void ) - { vgPlain_unimp("pthread_spin_init"); } -__attribute__((weak)) void pthread_spin_lock ( void ) - { vgPlain_unimp("pthread_spin_lock"); } -__attribute__((weak)) void pthread_spin_trylock ( void ) - { vgPlain_unimp("pthread_spin_trylock"); } -__attribute__((weak)) void pthread_spin_unlock ( void ) - { vgPlain_unimp("pthread_spin_unlock"); } - - -/*--------------------------------------------------------------------*/ -/*--- end vg_libpthread_unimp.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_main.c b/coregrind/vg_main.c deleted file mode 100644 index 5cce13d2e8..0000000000 --- a/coregrind/vg_main.c +++ /dev/null @@ -1,1411 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- C startup stuff, reached from vg_startup.S. ---*/ -/*--- vg_main.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_include.h" -#include "vg_constants.h" - - -/* --------------------------------------------------------------------- - Compute offsets into baseBlock. See comments in vg_include.h. - ------------------------------------------------------------------ */ - -/* The variables storing offsets. */ - -#define INVALID_OFFSET (-1) - -Int VGOFF_(m_eax) = INVALID_OFFSET; -Int VGOFF_(m_ecx) = INVALID_OFFSET; -Int VGOFF_(m_edx) = INVALID_OFFSET; -Int VGOFF_(m_ebx) = INVALID_OFFSET; -Int VGOFF_(m_esp) = INVALID_OFFSET; -Int VGOFF_(m_ebp) = INVALID_OFFSET; -Int VGOFF_(m_esi) = INVALID_OFFSET; -Int VGOFF_(m_edi) = INVALID_OFFSET; -Int VGOFF_(m_eflags) = INVALID_OFFSET; -Int VGOFF_(m_fpustate) = INVALID_OFFSET; -Int VGOFF_(m_eip) = INVALID_OFFSET; -Int VGOFF_(spillslots) = INVALID_OFFSET; -Int VGOFF_(sh_eax) = INVALID_OFFSET; -Int VGOFF_(sh_ecx) = INVALID_OFFSET; -Int VGOFF_(sh_edx) = INVALID_OFFSET; -Int VGOFF_(sh_ebx) = INVALID_OFFSET; -Int VGOFF_(sh_esp) = INVALID_OFFSET; -Int VGOFF_(sh_ebp) = INVALID_OFFSET; -Int VGOFF_(sh_esi) = INVALID_OFFSET; -Int VGOFF_(sh_edi) = INVALID_OFFSET; -Int VGOFF_(sh_eflags) = INVALID_OFFSET; -Int VGOFF_(helper_idiv_64_32) = INVALID_OFFSET; -Int VGOFF_(helper_div_64_32) = INVALID_OFFSET; -Int VGOFF_(helper_idiv_32_16) = INVALID_OFFSET; -Int VGOFF_(helper_div_32_16) = INVALID_OFFSET; -Int VGOFF_(helper_idiv_16_8) = INVALID_OFFSET; -Int VGOFF_(helper_div_16_8) = INVALID_OFFSET; -Int VGOFF_(helper_imul_32_64) = INVALID_OFFSET; -Int VGOFF_(helper_mul_32_64) = INVALID_OFFSET; -Int VGOFF_(helper_imul_16_32) = INVALID_OFFSET; -Int VGOFF_(helper_mul_16_32) = INVALID_OFFSET; -Int VGOFF_(helper_imul_8_16) = INVALID_OFFSET; -Int VGOFF_(helper_mul_8_16) = INVALID_OFFSET; -Int VGOFF_(helper_CLD) = INVALID_OFFSET; -Int VGOFF_(helper_STD) = INVALID_OFFSET; -Int VGOFF_(helper_get_dirflag) = INVALID_OFFSET; -Int VGOFF_(helper_CLC) = INVALID_OFFSET; -Int VGOFF_(helper_STC) = INVALID_OFFSET; -Int VGOFF_(helper_shldl) = INVALID_OFFSET; -Int VGOFF_(helper_shldw) = INVALID_OFFSET; -Int VGOFF_(helper_shrdl) = INVALID_OFFSET; -Int VGOFF_(helper_shrdw) = INVALID_OFFSET; -Int VGOFF_(helper_RDTSC) = INVALID_OFFSET; -Int VGOFF_(helper_CPUID) = INVALID_OFFSET; -Int VGOFF_(helper_BSWAP) = INVALID_OFFSET; -Int VGOFF_(helper_bsf) = INVALID_OFFSET; -Int VGOFF_(helper_bsr) = INVALID_OFFSET; -Int VGOFF_(helper_fstsw_AX) = INVALID_OFFSET; -Int VGOFF_(helper_SAHF) = INVALID_OFFSET; -Int VGOFF_(helper_DAS) = INVALID_OFFSET; -Int VGOFF_(helper_DAA) = INVALID_OFFSET; -Int VGOFF_(helper_value_check4_fail) = INVALID_OFFSET; -Int VGOFF_(helper_value_check2_fail) = INVALID_OFFSET; -Int VGOFF_(helper_value_check1_fail) = INVALID_OFFSET; -Int VGOFF_(helper_value_check0_fail) = INVALID_OFFSET; -Int VGOFF_(helperc_LOADV4) = INVALID_OFFSET; -Int VGOFF_(helperc_LOADV2) = INVALID_OFFSET; -Int VGOFF_(helperc_LOADV1) = INVALID_OFFSET; -Int VGOFF_(helperc_STOREV4) = INVALID_OFFSET; -Int VGOFF_(helperc_STOREV2) = INVALID_OFFSET; -Int VGOFF_(helperc_STOREV1) = INVALID_OFFSET; -Int VGOFF_(handle_esp_assignment) = INVALID_OFFSET; -Int VGOFF_(fpu_write_check) = INVALID_OFFSET; -Int VGOFF_(fpu_read_check) = INVALID_OFFSET; -Int VGOFF_(cachesim_log_non_mem_instr) = INVALID_OFFSET; -Int VGOFF_(cachesim_log_mem_instr) = INVALID_OFFSET; - -/* This is the actual defn of baseblock. */ -UInt VG_(baseBlock)[VG_BASEBLOCK_WORDS]; - -/* Words. */ -static Int baB_off = 0; - -/* Returns the offset, in words. */ -static Int alloc_BaB ( Int words ) -{ - Int off = baB_off; - baB_off += words; - if (baB_off >= VG_BASEBLOCK_WORDS) - VG_(panic)( "alloc_BaB: baseBlock is too small"); - - return off; -} - -/* Allocate 1 word in baseBlock and set it to the given value. */ -static Int alloc_BaB_1_set ( Addr a ) -{ - Int off = alloc_BaB(1); - VG_(baseBlock)[off] = (UInt)a; - return off; -} - - -/* Here we assign actual offsets. It's important to get the most - popular referents within 128 bytes of the start, so we can take - advantage of short addressing modes relative to %ebp. Popularity - of offsets was measured on 22 Feb 02 running a KDE application, and - the slots rearranged accordingly, with a 1.5% reduction in total - size of translations. */ - -static void vg_init_baseBlock ( void ) -{ - baB_off = 0; - - /* Those with offsets under 128 are carefully chosen. */ - - /* WORD offsets in this column */ - /* 0 */ VGOFF_(m_eax) = alloc_BaB(1); - /* 1 */ VGOFF_(m_ecx) = alloc_BaB(1); - /* 2 */ VGOFF_(m_edx) = alloc_BaB(1); - /* 3 */ VGOFF_(m_ebx) = alloc_BaB(1); - /* 4 */ VGOFF_(m_esp) = alloc_BaB(1); - /* 5 */ VGOFF_(m_ebp) = alloc_BaB(1); - /* 6 */ VGOFF_(m_esi) = alloc_BaB(1); - /* 7 */ VGOFF_(m_edi) = alloc_BaB(1); - /* 8 */ VGOFF_(m_eflags) = alloc_BaB(1); - - /* 9 */ VGOFF_(sh_eax) = alloc_BaB(1); - /* 10 */ VGOFF_(sh_ecx) = alloc_BaB(1); - /* 11 */ VGOFF_(sh_edx) = alloc_BaB(1); - /* 12 */ VGOFF_(sh_ebx) = alloc_BaB(1); - /* 13 */ VGOFF_(sh_esp) = alloc_BaB(1); - /* 14 */ VGOFF_(sh_ebp) = alloc_BaB(1); - /* 15 */ VGOFF_(sh_esi) = alloc_BaB(1); - /* 16 */ VGOFF_(sh_edi) = alloc_BaB(1); - /* 17 */ VGOFF_(sh_eflags) = alloc_BaB(1); - - /* 17a */ - VGOFF_(cachesim_log_non_mem_instr) - = alloc_BaB_1_set( (Addr) & VG_(cachesim_log_non_mem_instr) ); - /* 17b */ - VGOFF_(cachesim_log_mem_instr) - = alloc_BaB_1_set( (Addr) & VG_(cachesim_log_mem_instr) ); - - /* 18 */ - VGOFF_(helper_value_check4_fail) - = alloc_BaB_1_set( (Addr) & VG_(helper_value_check4_fail) ); - /* 19 */ - VGOFF_(helper_value_check0_fail) - = alloc_BaB_1_set( (Addr) & VG_(helper_value_check0_fail) ); - - /* 20 */ - VGOFF_(helperc_STOREV4) - = alloc_BaB_1_set( (Addr) & VG_(helperc_STOREV4) ); - /* 21 */ - VGOFF_(helperc_STOREV1) - = alloc_BaB_1_set( (Addr) & VG_(helperc_STOREV1) ); - - /* 22 */ - VGOFF_(helperc_LOADV4) - = alloc_BaB_1_set( (Addr) & VG_(helperc_LOADV4) ); - /* 23 */ - VGOFF_(helperc_LOADV1) - = alloc_BaB_1_set( (Addr) & VG_(helperc_LOADV1) ); - - /* 24 */ - VGOFF_(handle_esp_assignment) - = alloc_BaB_1_set( (Addr) & VGM_(handle_esp_assignment) ); - - /* 25 */ - VGOFF_(m_eip) = alloc_BaB(1); - - /* There are currently 24 spill slots */ - /* 26 .. 49 This overlaps the magic boundary at >= 32 words, but - most spills are to low numbered spill slots, so the ones above - the boundary don't see much action. */ - VGOFF_(spillslots) = alloc_BaB(VG_MAX_SPILLSLOTS); - - /* These two pushed beyond the boundary because 2-byte transactions - are rare. */ - /* 50 */ - VGOFF_(helperc_STOREV2) - = alloc_BaB_1_set( (Addr) & VG_(helperc_STOREV2) ); - /* 51 */ - VGOFF_(helperc_LOADV2) - = alloc_BaB_1_set( (Addr) & VG_(helperc_LOADV2) ); - - /* 52 */ - VGOFF_(fpu_write_check) - = alloc_BaB_1_set( (Addr) & VGM_(fpu_write_check) ); - /* 53 */ - VGOFF_(fpu_read_check) - = alloc_BaB_1_set( (Addr) & VGM_(fpu_read_check) ); - - /* Actually I don't think these two are ever used. */ - /* 54 */ - VGOFF_(helper_value_check2_fail) - = alloc_BaB_1_set( (Addr) & VG_(helper_value_check2_fail) ); - /* 55 */ - VGOFF_(helper_value_check1_fail) - = alloc_BaB_1_set( (Addr) & VG_(helper_value_check1_fail) ); - - /* I gave up counting at this point. Since they're way above the - short-amode-boundary, there's no point. */ - - VGOFF_(m_fpustate) = alloc_BaB(VG_SIZE_OF_FPUSTATE_W); - - VGOFF_(helper_idiv_64_32) - = alloc_BaB_1_set( (Addr) & VG_(helper_idiv_64_32) ); - VGOFF_(helper_div_64_32) - = alloc_BaB_1_set( (Addr) & VG_(helper_div_64_32) ); - VGOFF_(helper_idiv_32_16) - = alloc_BaB_1_set( (Addr) & VG_(helper_idiv_32_16) ); - VGOFF_(helper_div_32_16) - = alloc_BaB_1_set( (Addr) & VG_(helper_div_32_16) ); - VGOFF_(helper_idiv_16_8) - = alloc_BaB_1_set( (Addr) & VG_(helper_idiv_16_8) ); - VGOFF_(helper_div_16_8) - = alloc_BaB_1_set( (Addr) & VG_(helper_div_16_8) ); - - VGOFF_(helper_imul_32_64) - = alloc_BaB_1_set( (Addr) & VG_(helper_imul_32_64) ); - VGOFF_(helper_mul_32_64) - = alloc_BaB_1_set( (Addr) & VG_(helper_mul_32_64) ); - VGOFF_(helper_imul_16_32) - = alloc_BaB_1_set( (Addr) & VG_(helper_imul_16_32) ); - VGOFF_(helper_mul_16_32) - = alloc_BaB_1_set( (Addr) & VG_(helper_mul_16_32) ); - VGOFF_(helper_imul_8_16) - = alloc_BaB_1_set( (Addr) & VG_(helper_imul_8_16) ); - VGOFF_(helper_mul_8_16) - = alloc_BaB_1_set( (Addr) & VG_(helper_mul_8_16) ); - - VGOFF_(helper_CLD) - = alloc_BaB_1_set( (Addr) & VG_(helper_CLD) ); - VGOFF_(helper_STD) - = alloc_BaB_1_set( (Addr) & VG_(helper_STD) ); - VGOFF_(helper_get_dirflag) - = alloc_BaB_1_set( (Addr) & VG_(helper_get_dirflag) ); - - VGOFF_(helper_CLC) - = alloc_BaB_1_set( (Addr) & VG_(helper_CLC) ); - VGOFF_(helper_STC) - = alloc_BaB_1_set( (Addr) & VG_(helper_STC) ); - - VGOFF_(helper_shldl) - = alloc_BaB_1_set( (Addr) & VG_(helper_shldl) ); - VGOFF_(helper_shldw) - = alloc_BaB_1_set( (Addr) & VG_(helper_shldw) ); - VGOFF_(helper_shrdl) - = alloc_BaB_1_set( (Addr) & VG_(helper_shrdl) ); - VGOFF_(helper_shrdw) - = alloc_BaB_1_set( (Addr) & VG_(helper_shrdw) ); - - VGOFF_(helper_RDTSC) - = alloc_BaB_1_set( (Addr) & VG_(helper_RDTSC) ); - VGOFF_(helper_CPUID) - = alloc_BaB_1_set( (Addr) & VG_(helper_CPUID) ); - - VGOFF_(helper_bsf) - = alloc_BaB_1_set( (Addr) & VG_(helper_bsf) ); - VGOFF_(helper_bsr) - = alloc_BaB_1_set( (Addr) & VG_(helper_bsr) ); - - VGOFF_(helper_fstsw_AX) - = alloc_BaB_1_set( (Addr) & VG_(helper_fstsw_AX) ); - VGOFF_(helper_SAHF) - = alloc_BaB_1_set( (Addr) & VG_(helper_SAHF) ); - VGOFF_(helper_DAS) - = alloc_BaB_1_set( (Addr) & VG_(helper_DAS) ); - VGOFF_(helper_DAA) - = alloc_BaB_1_set( (Addr) & VG_(helper_DAA) ); -} - - -/* --------------------------------------------------------------------- - Global entities which are not referenced from generated code. - ------------------------------------------------------------------ */ - -/* The stack on which Valgrind runs. We can't use the same stack as - the simulatee -- that's an important design decision. */ -UInt VG_(stack)[10000]; - -/* Ditto our signal delivery stack. */ -UInt VG_(sigstack)[10000]; - -/* Saving stuff across system calls. */ -UInt VG_(real_fpu_state_saved_over_syscall)[VG_SIZE_OF_FPUSTATE_W]; -Addr VG_(esp_saved_over_syscall); - -/* Counts downwards in vg_run_innerloop. */ -UInt VG_(dispatch_ctr); - - -/* 64-bit counter for the number of basic blocks done. */ -ULong VG_(bbs_done); -/* 64-bit counter for the number of bbs to go before a debug exit. */ -ULong VG_(bbs_to_go); - -/* Produce debugging output? */ -Bool VG_(disassemble) = False; - -/* The current LRU epoch. */ -UInt VG_(current_epoch) = 0; - -/* This is the ThreadId of the last thread the scheduler ran. */ -ThreadId VG_(last_run_tid) = 0; - - -/* --------------------------------------------------------------------- - Counters, for informational purposes only. - ------------------------------------------------------------------ */ - -/* Number of lookups which miss the fast tt helper. */ -UInt VG_(tt_fast_misses) = 0; - - -/* Counts for LRU informational messages. */ - -/* Number and total o/t size of new translations this epoch. */ -UInt VG_(this_epoch_in_count) = 0; -UInt VG_(this_epoch_in_osize) = 0; -UInt VG_(this_epoch_in_tsize) = 0; -/* Number and total o/t size of discarded translations this epoch. */ -UInt VG_(this_epoch_out_count) = 0; -UInt VG_(this_epoch_out_osize) = 0; -UInt VG_(this_epoch_out_tsize) = 0; -/* Number and total o/t size of translations overall. */ -UInt VG_(overall_in_count) = 0; -UInt VG_(overall_in_osize) = 0; -UInt VG_(overall_in_tsize) = 0; -/* Number and total o/t size of discards overall. */ -UInt VG_(overall_out_count) = 0; -UInt VG_(overall_out_osize) = 0; -UInt VG_(overall_out_tsize) = 0; - -/* The number of LRU-clearings of TT/TC. */ -UInt VG_(number_of_lrus) = 0; - - -/* Counts pertaining to the register allocator. */ - -/* total number of uinstrs input to reg-alloc */ -UInt VG_(uinstrs_prealloc) = 0; - -/* total number of uinstrs added due to spill code */ -UInt VG_(uinstrs_spill) = 0; - -/* number of bbs requiring spill code */ -UInt VG_(translations_needing_spill) = 0; - -/* total of register ranks over all translations */ -UInt VG_(total_reg_rank) = 0; - - -/* Counts pertaining to internal sanity checking. */ -UInt VG_(sanity_fast_count) = 0; -UInt VG_(sanity_slow_count) = 0; - -/* Counts pertaining to the scheduler. */ -UInt VG_(num_scheduling_events_MINOR) = 0; -UInt VG_(num_scheduling_events_MAJOR) = 0; - - -/* --------------------------------------------------------------------- - Values derived from command-line options. - ------------------------------------------------------------------ */ - -Bool VG_(clo_error_limit); -Bool VG_(clo_check_addrVs); -Bool VG_(clo_GDB_attach); -Int VG_(sanity_level); -Int VG_(clo_verbosity); -Bool VG_(clo_demangle); -Bool VG_(clo_leak_check); -Bool VG_(clo_show_reachable); -Int VG_(clo_leak_resolution); -Bool VG_(clo_sloppy_malloc); -Int VG_(clo_alignment); -Bool VG_(clo_partial_loads_ok); -Bool VG_(clo_trace_children); -Int VG_(clo_logfile_fd); -Int VG_(clo_freelist_vol); -Bool VG_(clo_workaround_gcc296_bugs); -Int VG_(clo_n_suppressions); -Char* VG_(clo_suppressions)[VG_CLO_MAX_SFILES]; -Bool VG_(clo_single_step); -Bool VG_(clo_optimise); -Bool VG_(clo_instrument); -Bool VG_(clo_cleanup); -Bool VG_(clo_cachesim); -cache_t VG_(clo_I1_cache); -cache_t VG_(clo_D1_cache); -cache_t VG_(clo_L2_cache); -Int VG_(clo_smc_check); -Bool VG_(clo_trace_syscalls); -Bool VG_(clo_trace_signals); -Bool VG_(clo_trace_symtab); -Bool VG_(clo_trace_malloc); -Bool VG_(clo_trace_sched); -Int VG_(clo_trace_pthread_level); -ULong VG_(clo_stop_after); -Int VG_(clo_dump_error); -Int VG_(clo_backtrace_size); -Char* VG_(clo_weird_hacks); - -/* This Bool is needed by wrappers in vg_clientmalloc.c to decide how - to behave. Initially we say False. */ -Bool VG_(running_on_simd_CPU) = False; - -/* Holds client's %esp at the point we gained control. */ -Addr VG_(esp_at_startup); - -/* As deduced from VG_(esp_at_startup), the client's argc, argv[] and - envp[] as extracted from the client's stack at startup-time. */ -Int VG_(client_argc); -Char** VG_(client_argv); -Char** VG_(client_envp); - -/* A place into which to copy the value of env var VG_ARGS, so we - don't have to modify the original. */ -static Char vg_cmdline_copy[M_VG_CMDLINE_STRLEN]; - - -/* --------------------------------------------------------------------- - Processing of command-line options. - ------------------------------------------------------------------ */ - -static void bad_option ( Char* opt ) -{ - VG_(shutdown_logging)(); - VG_(clo_logfile_fd) = 2; /* stderr */ - VG_(printf)("valgrind.so: Bad option `%s'; aborting.\n", opt); - VG_(exit)(1); -} - -static void config_error ( Char* msg ) -{ - VG_(shutdown_logging)(); - VG_(clo_logfile_fd) = 2; /* stderr */ - VG_(printf)( - "valgrind.so: Startup or configuration error:\n %s\n", msg); - VG_(printf)( - "valgrind.so: Unable to start up properly. Giving up.\n"); - VG_(exit)(1); -} - -static void args_grok_error ( Char* msg ) -{ - VG_(shutdown_logging)(); - VG_(clo_logfile_fd) = 2; /* stderr */ - VG_(printf)("valgrind.so: When searching for " - "client's argc/argc/envp:\n\t%s\n", msg); - config_error("couldn't find client's argc/argc/envp"); -} - -static void parse_cache_opt ( cache_t* cache, char* orig_opt, int opt_len ) -{ - int i1, i2, i3; - int i; - char *opt = VG_(strdup)(VG_AR_PRIVATE, orig_opt); - - i = i1 = opt_len; - - /* Option looks like "--I1=65536,2,64". - * Find commas, replace with NULs to make three independent - * strings, then extract numbers. Yuck. */ - while (VG_(isdigit)(opt[i])) i++; - if (',' == opt[i]) { - opt[i++] = '\0'; - i2 = i; - } else goto bad; - while (VG_(isdigit)(opt[i])) i++; - if (',' == opt[i]) { - opt[i++] = '\0'; - i3 = i; - } else goto bad; - while (VG_(isdigit)(opt[i])) i++; - if ('\0' != opt[i]) goto bad; - - cache->size = (Int)VG_(atoll)(opt + i1); - cache->assoc = (Int)VG_(atoll)(opt + i2); - cache->line_size = (Int)VG_(atoll)(opt + i3); - - VG_(free)(VG_AR_PRIVATE, opt); - return; - - bad: - bad_option(orig_opt); -} - -static void process_cmd_line_options ( void ) -{ - UChar* argv[M_VG_CMDLINE_OPTS]; - UInt argc; - UChar* p; - UChar* str; - Int i, eventually_logfile_fd, ctr; - -# define ISSPACE(cc) ((cc) == ' ' || (cc) == '\t' || (cc) == '\n') -# define STREQ(s1,s2) (0==VG_(strcmp_ws)((s1),(s2))) -# define STREQN(nn,s1,s2) (0==VG_(strncmp_ws)((s1),(s2),(nn))) - - /* Set defaults. */ - VG_(clo_error_limit) = True; - VG_(clo_check_addrVs) = True; - VG_(clo_GDB_attach) = False; - VG_(sanity_level) = 1; - VG_(clo_verbosity) = 1; - VG_(clo_demangle) = True; - VG_(clo_leak_check) = False; - VG_(clo_show_reachable) = False; - VG_(clo_leak_resolution) = 2; - VG_(clo_sloppy_malloc) = False; - VG_(clo_alignment) = 4; - VG_(clo_partial_loads_ok) = True; - VG_(clo_trace_children) = False; - VG_(clo_logfile_fd) = 2; /* stderr */ - VG_(clo_freelist_vol) = 1000000; - VG_(clo_workaround_gcc296_bugs) = False; - VG_(clo_n_suppressions) = 0; - VG_(clo_single_step) = False; - VG_(clo_optimise) = True; - VG_(clo_instrument) = True; - VG_(clo_cachesim) = False; - VG_(clo_I1_cache) = UNDEFINED_CACHE; - VG_(clo_D1_cache) = UNDEFINED_CACHE; - VG_(clo_L2_cache) = UNDEFINED_CACHE; - VG_(clo_cleanup) = True; - VG_(clo_smc_check) = /* VG_CLO_SMC_SOME */ VG_CLO_SMC_NONE; - VG_(clo_trace_syscalls) = False; - VG_(clo_trace_signals) = False; - VG_(clo_trace_symtab) = False; - VG_(clo_trace_malloc) = False; - VG_(clo_trace_sched) = False; - VG_(clo_trace_pthread_level) = 0; - VG_(clo_stop_after) = 1000000000000LL; - VG_(clo_dump_error) = 0; - VG_(clo_backtrace_size) = 4; - VG_(clo_weird_hacks) = NULL; - - eventually_logfile_fd = VG_(clo_logfile_fd); - - /* Once logging is started, we can safely send messages pertaining - to failures in initialisation. */ - VG_(startup_logging)(); - - /* Check for sane path in ./configure --prefix=... */ - if (VG_(strlen)(VG_LIBDIR) < 1 - || VG_LIBDIR[0] != '/') - config_error("Please use absolute paths in " - "./configure --prefix=... or --libdir=..."); - - /* (Suggested by Fabrice Bellard ... ) - We look for the Linux ELF table and go down until we find the - envc & envp. It is not fool-proof, but these structures should - change less often than the libc ones. */ - { - UInt* sp = 0; /* bogus init to keep gcc -O happy */ - - /* locate the top of the stack */ - if (VG_STACK_MATCHES_BASE( VG_(esp_at_startup), - VG_STARTUP_STACK_BASE_1 )) { - sp = (UInt*)VG_STARTUP_STACK_BASE_1; - } else - if (VG_STACK_MATCHES_BASE( VG_(esp_at_startup), - VG_STARTUP_STACK_BASE_2 )) { - sp = (UInt*)VG_STARTUP_STACK_BASE_2; - } else - if (VG_STACK_MATCHES_BASE( VG_(esp_at_startup), - VG_STARTUP_STACK_BASE_3 )) { - sp = (UInt*)VG_STARTUP_STACK_BASE_3; - - } else { - args_grok_error( - "startup %esp is not near any VG_STARTUP_STACK_BASE_*\n " - "constants defined in vg_include.h. You should investigate." - ); - } - - /* we locate: NEW_AUX_ENT(1, AT_PAGESZ, ELF_EXEC_PAGESIZE) in - the elf interpreter table */ - sp -= 2; - while (sp[0] != VKI_AT_PAGESZ || sp[1] != 4096) { - /* VG_(printf)("trying %p\n", sp); */ - sp--; - } - - if (sp[2] == VKI_AT_BASE - && sp[0] == VKI_AT_PAGESZ - && sp[-2] == VKI_AT_PHNUM - && sp[-4] == VKI_AT_PHENT - && sp[-6] == VKI_AT_PHDR - && sp[-6-1] == 0) { - if (0) - VG_(printf)("Looks like you've got a 2.2.X kernel here.\n"); - sp -= 6; - } else - if (sp[2] == VKI_AT_CLKTCK - && sp[0] == VKI_AT_PAGESZ - && sp[-2] == VKI_AT_HWCAP - && sp[-2-1] == 0) { - if (0) - VG_(printf)("Looks like you've got a 2.4.X kernel here.\n"); - sp -= 2; - } else - if (sp[2] == VKI_AT_CLKTCK - && sp[0] == VKI_AT_PAGESZ - && sp[-2] == VKI_AT_HWCAP - && sp[-4] == VKI_AT_USER_AUX_SEGMENT - && sp[-4-1] == 0) { - if (0) - VG_(printf)("Looks like you've got a R-H Limbo 2.4.X " - "kernel here.\n"); - sp -= 4; - } else - if (sp[2] == VKI_AT_CLKTCK - && sp[0] == VKI_AT_PAGESZ - && sp[-2] == VKI_AT_HWCAP - && sp[-2-20-1] == 0) { - if (0) - VG_(printf)("Looks like you've got a early 2.4.X kernel here.\n"); - sp -= 22; - } else - args_grok_error( - "ELF frame does not look like 2.2.X or 2.4.X.\n " - "See kernel sources linux/fs/binfmt_elf.c to make sense of this." - ); - - sp--; - if (*sp != 0) - args_grok_error("can't find NULL at end of env[]"); - - /* sp now points to NULL at the end of env[] */ - ctr = 0; - while (True) { - sp --; - if (*sp == 0) break; - if (++ctr >= 1000) - args_grok_error( - "suspiciously many (1000) env[] entries; giving up"); - - } - /* sp now points to NULL at the end of argv[] */ - VG_(client_envp) = (Char**)(sp+1); - - ctr = 0; - VG_(client_argc) = 0; - while (True) { - sp--; - if (*sp == VG_(client_argc)) - break; - VG_(client_argc)++; - if (++ctr >= 1000) - args_grok_error( - "suspiciously many (1000) argv[] entries; giving up"); - } - - VG_(client_argv) = (Char**)(sp+1); - } - - /* Now that VG_(client_envp) has been set, we can extract the args - for Valgrind itself. Copy into global var so that we don't have to - write zeroes to the getenv'd value itself. */ - str = VG_(getenv)("VG_ARGS"); - argc = 0; - - if (!str) { - config_error("Can't read options from env var VG_ARGS."); - } - - if (VG_(strlen)(str) >= M_VG_CMDLINE_STRLEN-1) { - config_error("Command line length exceeds M_CMDLINE_STRLEN."); - } - VG_(strcpy)(vg_cmdline_copy, str); - str = NULL; - - p = &vg_cmdline_copy[0]; - while (True) { - while (ISSPACE(*p)) { *p = 0; p++; } - if (*p == 0) break; - if (argc < M_VG_CMDLINE_OPTS-1) { - argv[argc] = p; argc++; - } else { - config_error( - "Found more than M_CMDLINE_OPTS command-line opts."); - } - while (*p != 0 && !ISSPACE(*p)) p++; - } - - for (i = 0; i < argc; i++) { - - if (STREQ(argv[i], "-v") || STREQ(argv[i], "--verbose")) - VG_(clo_verbosity)++; - else if (STREQ(argv[i], "-q") || STREQ(argv[i], "--quiet")) - VG_(clo_verbosity)--; - - else if (STREQ(argv[i], "--error-limit=yes")) - VG_(clo_error_limit) = True; - else if (STREQ(argv[i], "--error-limit=no")) - VG_(clo_error_limit) = False; - - else if (STREQ(argv[i], "--check-addrVs=yes")) - VG_(clo_check_addrVs) = True; - else if (STREQ(argv[i], "--check-addrVs=no")) - VG_(clo_check_addrVs) = False; - - else if (STREQ(argv[i], "--gdb-attach=yes")) - VG_(clo_GDB_attach) = True; - else if (STREQ(argv[i], "--gdb-attach=no")) - VG_(clo_GDB_attach) = False; - - else if (STREQ(argv[i], "--demangle=yes")) - VG_(clo_demangle) = True; - else if (STREQ(argv[i], "--demangle=no")) - VG_(clo_demangle) = False; - - else if (STREQ(argv[i], "--partial-loads-ok=yes")) - VG_(clo_partial_loads_ok) = True; - else if (STREQ(argv[i], "--partial-loads-ok=no")) - VG_(clo_partial_loads_ok) = False; - - else if (STREQ(argv[i], "--leak-check=yes")) - VG_(clo_leak_check) = True; - else if (STREQ(argv[i], "--leak-check=no")) - VG_(clo_leak_check) = False; - - else if (STREQ(argv[i], "--show-reachable=yes")) - VG_(clo_show_reachable) = True; - else if (STREQ(argv[i], "--show-reachable=no")) - VG_(clo_show_reachable) = False; - - else if (STREQ(argv[i], "--leak-resolution=low")) - VG_(clo_leak_resolution) = 2; - else if (STREQ(argv[i], "--leak-resolution=med")) - VG_(clo_leak_resolution) = 4; - else if (STREQ(argv[i], "--leak-resolution=high")) - VG_(clo_leak_resolution) = VG_DEEPEST_BACKTRACE; - - else if (STREQ(argv[i], "--sloppy-malloc=yes")) - VG_(clo_sloppy_malloc) = True; - else if (STREQ(argv[i], "--sloppy-malloc=no")) - VG_(clo_sloppy_malloc) = False; - - else if (STREQN(12, argv[i], "--alignment=")) - VG_(clo_alignment) = (Int)VG_(atoll)(&argv[i][12]); - - else if (STREQ(argv[i], "--trace-children=yes")) - VG_(clo_trace_children) = True; - else if (STREQ(argv[i], "--trace-children=no")) - VG_(clo_trace_children) = False; - - else if (STREQ(argv[i], "--workaround-gcc296-bugs=yes")) - VG_(clo_workaround_gcc296_bugs) = True; - else if (STREQ(argv[i], "--workaround-gcc296-bugs=no")) - VG_(clo_workaround_gcc296_bugs) = False; - - else if (STREQN(15, argv[i], "--sanity-level=")) - VG_(sanity_level) = (Int)VG_(atoll)(&argv[i][15]); - - else if (STREQN(13, argv[i], "--logfile-fd=")) - eventually_logfile_fd = (Int)VG_(atoll)(&argv[i][13]); - - else if (STREQN(15, argv[i], "--freelist-vol=")) { - VG_(clo_freelist_vol) = (Int)VG_(atoll)(&argv[i][15]); - if (VG_(clo_freelist_vol) < 0) VG_(clo_freelist_vol) = 2; - } - - else if (STREQN(15, argv[i], "--suppressions=")) { - if (VG_(clo_n_suppressions) >= VG_CLO_MAX_SFILES) { - VG_(message)(Vg_UserMsg, "Too many logfiles specified."); - VG_(message)(Vg_UserMsg, - "Increase VG_CLO_MAX_SFILES and recompile."); - bad_option(argv[i]); - } - VG_(clo_suppressions)[VG_(clo_n_suppressions)] = &argv[i][15]; - VG_(clo_n_suppressions)++; - } - else if (STREQ(argv[i], "--single-step=yes")) - VG_(clo_single_step) = True; - else if (STREQ(argv[i], "--single-step=no")) - VG_(clo_single_step) = False; - - else if (STREQ(argv[i], "--optimise=yes")) - VG_(clo_optimise) = True; - else if (STREQ(argv[i], "--optimise=no")) - VG_(clo_optimise) = False; - - else if (STREQ(argv[i], "--instrument=yes")) - VG_(clo_instrument) = True; - else if (STREQ(argv[i], "--instrument=no")) - VG_(clo_instrument) = False; - - else if (STREQ(argv[i], "--cleanup=yes")) - VG_(clo_cleanup) = True; - else if (STREQ(argv[i], "--cleanup=no")) - VG_(clo_cleanup) = False; - - else if (STREQ(argv[i], "--cachesim=yes")) - VG_(clo_cachesim) = True; - else if (STREQ(argv[i], "--cachesim=no")) - VG_(clo_cachesim) = False; - - /* 5 is length of "--I1=" */ - else if (0 == VG_(strncmp)(argv[i], "--I1=", 5)) - parse_cache_opt(&VG_(clo_I1_cache), argv[i], 5); - else if (0 == VG_(strncmp)(argv[i], "--D1=", 5)) - parse_cache_opt(&VG_(clo_D1_cache), argv[i], 5); - else if (0 == VG_(strncmp)(argv[i], "--L2=", 5)) - parse_cache_opt(&VG_(clo_L2_cache), argv[i], 5); - - else if (STREQ(argv[i], "--smc-check=none")) - VG_(clo_smc_check) = VG_CLO_SMC_NONE; - else if (STREQ(argv[i], "--smc-check=some")) - VG_(clo_smc_check) = VG_CLO_SMC_SOME; - else if (STREQ(argv[i], "--smc-check=all")) - VG_(clo_smc_check) = VG_CLO_SMC_ALL; - - else if (STREQ(argv[i], "--trace-syscalls=yes")) - VG_(clo_trace_syscalls) = True; - else if (STREQ(argv[i], "--trace-syscalls=no")) - VG_(clo_trace_syscalls) = False; - - else if (STREQ(argv[i], "--trace-signals=yes")) - VG_(clo_trace_signals) = True; - else if (STREQ(argv[i], "--trace-signals=no")) - VG_(clo_trace_signals) = False; - - else if (STREQ(argv[i], "--trace-symtab=yes")) - VG_(clo_trace_symtab) = True; - else if (STREQ(argv[i], "--trace-symtab=no")) - VG_(clo_trace_symtab) = False; - - else if (STREQ(argv[i], "--trace-malloc=yes")) - VG_(clo_trace_malloc) = True; - else if (STREQ(argv[i], "--trace-malloc=no")) - VG_(clo_trace_malloc) = False; - - else if (STREQ(argv[i], "--trace-sched=yes")) - VG_(clo_trace_sched) = True; - else if (STREQ(argv[i], "--trace-sched=no")) - VG_(clo_trace_sched) = False; - - else if (STREQ(argv[i], "--trace-pthread=none")) - VG_(clo_trace_pthread_level) = 0; - else if (STREQ(argv[i], "--trace-pthread=some")) - VG_(clo_trace_pthread_level) = 1; - else if (STREQ(argv[i], "--trace-pthread=all")) - VG_(clo_trace_pthread_level) = 2; - - else if (STREQN(14, argv[i], "--weird-hacks=")) - VG_(clo_weird_hacks) = &argv[i][14]; - - else if (STREQN(13, argv[i], "--stop-after=")) - VG_(clo_stop_after) = VG_(atoll)(&argv[i][13]); - - else if (STREQN(13, argv[i], "--dump-error=")) - VG_(clo_dump_error) = (Int)VG_(atoll)(&argv[i][13]); - - else if (STREQN(14, argv[i], "--num-callers=")) { - /* Make sure it's sane. */ - VG_(clo_backtrace_size) = (Int)VG_(atoll)(&argv[i][14]); - if (VG_(clo_backtrace_size) < 2) - VG_(clo_backtrace_size) = 2; - if (VG_(clo_backtrace_size) >= VG_DEEPEST_BACKTRACE) - VG_(clo_backtrace_size) = VG_DEEPEST_BACKTRACE; - } - - else - bad_option(argv[i]); - } - -# undef ISSPACE -# undef STREQ -# undef STREQN - - if (VG_(clo_verbosity < 0)) - VG_(clo_verbosity) = 0; - - if (VG_(clo_alignment) < 4 - || VG_(clo_alignment) > 4096 - || VG_(log2)( VG_(clo_alignment) ) == -1 /* not a power of 2 */) { - VG_(message)(Vg_UserMsg, ""); - VG_(message)(Vg_UserMsg, - "Invalid --alignment= setting. " - "Should be a power of 2, >= 4, <= 4096."); - bad_option("--alignment"); - } - - if (VG_(clo_GDB_attach) && VG_(clo_trace_children)) { - VG_(message)(Vg_UserMsg, ""); - VG_(message)(Vg_UserMsg, - "--gdb-attach=yes conflicts with --trace-children=yes"); - VG_(message)(Vg_UserMsg, - "Please choose one or the other, but not both."); - bad_option("--gdb-attach=yes and --trace-children=yes"); - } - - VG_(clo_logfile_fd) = eventually_logfile_fd; - - /* Don't do memory checking if simulating the cache. */ - if (VG_(clo_cachesim)) { - VG_(clo_instrument) = False; - } - - if (VG_(clo_verbosity > 0)) { - if (VG_(clo_cachesim)) { - VG_(message)(Vg_UserMsg, - "cachegrind-%s, an I1/D1/L2 cache profiler for x86 GNU/Linux.", - VERSION); - } else { - VG_(message)(Vg_UserMsg, - "valgrind-%s, a memory error detector for x86 GNU/Linux.", - VERSION); - } - } - - if (VG_(clo_verbosity > 0)) - VG_(message)(Vg_UserMsg, - "Copyright (C) 2000-2002, and GNU GPL'd, by Julian Seward."); - if (VG_(clo_verbosity) > 1) { - VG_(message)(Vg_UserMsg, "Startup, with flags:"); - for (i = 0; i < argc; i++) { - VG_(message)(Vg_UserMsg, " %s", argv[i]); - } - } - - if (VG_(clo_n_suppressions) == 0 && !VG_(clo_cachesim)) { - config_error("No error-suppression files were specified."); - } -} - - -/* --------------------------------------------------------------------- - Copying to/from m_state_static. - ------------------------------------------------------------------ */ - -UInt VG_(m_state_static) [8 /* int regs, in Intel order */ - + 1 /* %eflags */ - + 1 /* %eip */ - + VG_SIZE_OF_FPUSTATE_W /* FPU state */ - ]; - -void VG_(copy_baseBlock_to_m_state_static) ( void ) -{ - Int i; - VG_(m_state_static)[ 0/4] = VG_(baseBlock)[VGOFF_(m_eax)]; - VG_(m_state_static)[ 4/4] = VG_(baseBlock)[VGOFF_(m_ecx)]; - VG_(m_state_static)[ 8/4] = VG_(baseBlock)[VGOFF_(m_edx)]; - VG_(m_state_static)[12/4] = VG_(baseBlock)[VGOFF_(m_ebx)]; - VG_(m_state_static)[16/4] = VG_(baseBlock)[VGOFF_(m_esp)]; - VG_(m_state_static)[20/4] = VG_(baseBlock)[VGOFF_(m_ebp)]; - VG_(m_state_static)[24/4] = VG_(baseBlock)[VGOFF_(m_esi)]; - VG_(m_state_static)[28/4] = VG_(baseBlock)[VGOFF_(m_edi)]; - - VG_(m_state_static)[32/4] = VG_(baseBlock)[VGOFF_(m_eflags)]; - VG_(m_state_static)[36/4] = VG_(baseBlock)[VGOFF_(m_eip)]; - - for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) - VG_(m_state_static)[40/4 + i] - = VG_(baseBlock)[VGOFF_(m_fpustate) + i]; -} - - -void VG_(copy_m_state_static_to_baseBlock) ( void ) -{ - Int i; - VG_(baseBlock)[VGOFF_(m_eax)] = VG_(m_state_static)[ 0/4]; - VG_(baseBlock)[VGOFF_(m_ecx)] = VG_(m_state_static)[ 4/4]; - VG_(baseBlock)[VGOFF_(m_edx)] = VG_(m_state_static)[ 8/4]; - VG_(baseBlock)[VGOFF_(m_ebx)] = VG_(m_state_static)[12/4]; - VG_(baseBlock)[VGOFF_(m_esp)] = VG_(m_state_static)[16/4]; - VG_(baseBlock)[VGOFF_(m_ebp)] = VG_(m_state_static)[20/4]; - VG_(baseBlock)[VGOFF_(m_esi)] = VG_(m_state_static)[24/4]; - VG_(baseBlock)[VGOFF_(m_edi)] = VG_(m_state_static)[28/4]; - - VG_(baseBlock)[VGOFF_(m_eflags)] = VG_(m_state_static)[32/4]; - VG_(baseBlock)[VGOFF_(m_eip)] = VG_(m_state_static)[36/4]; - - for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) - VG_(baseBlock)[VGOFF_(m_fpustate) + i] - = VG_(m_state_static)[40/4 + i]; -} - - -/* --------------------------------------------------------------------- - Show accumulated counts. - ------------------------------------------------------------------ */ - -static void vg_show_counts ( void ) -{ - VG_(message)(Vg_DebugMsg, - " lru: %d epochs, %d clearings.", - VG_(current_epoch), - VG_(number_of_lrus) ); - VG_(message)(Vg_DebugMsg, - "translate: new %d (%d -> %d), discard %d (%d -> %d).", - VG_(overall_in_count), - VG_(overall_in_osize), - VG_(overall_in_tsize), - VG_(overall_out_count), - VG_(overall_out_osize), - VG_(overall_out_tsize) ); - VG_(message)(Vg_DebugMsg, - " dispatch: %lu basic blocks, %d/%d sched events, %d tt_fast misses.", - VG_(bbs_done), VG_(num_scheduling_events_MAJOR), - VG_(num_scheduling_events_MINOR), - VG_(tt_fast_misses)); - VG_(message)(Vg_DebugMsg, - "reg-alloc: %d t-req-spill, " - "%d+%d orig+spill uis, %d total-reg-r.", - VG_(translations_needing_spill), - VG_(uinstrs_prealloc), - VG_(uinstrs_spill), - VG_(total_reg_rank) ); - VG_(message)(Vg_DebugMsg, - " sanity: %d cheap, %d expensive checks.", - VG_(sanity_fast_count), - VG_(sanity_slow_count) ); -} - - -/* --------------------------------------------------------------------- - Main! - ------------------------------------------------------------------ */ - -/* Where we jump to once Valgrind has got control, and the real - machine's state has been copied to the m_state_static. */ - -void VG_(main) ( void ) -{ - Int i; - VgSchedReturnCode src; - ThreadState* tst; - - /* Set up our stack sanity-check words. */ - for (i = 0; i < 10; i++) { - VG_(stack)[i] = (UInt)(&VG_(stack)[i]) ^ 0xA4B3C2D1; - VG_(stack)[10000-1-i] = (UInt)(&VG_(stack)[10000-i-1]) ^ 0xABCD4321; - } - - /* Set up baseBlock offsets and copy the saved machine's state into - it. */ - vg_init_baseBlock(); - VG_(copy_m_state_static_to_baseBlock)(); - - /* Process Valgrind's command-line opts (from env var VG_OPTS). */ - process_cmd_line_options(); - - /* Hook to delay things long enough so we can get the pid and - attach GDB in another shell. */ - if (0) { - Int p, q; - for (p = 0; p < 50000; p++) - for (q = 0; q < 50000; q++) ; - } - - /* Initialise the scheduler, and copy the client's state from - baseBlock into VG_(threads)[1]. This has to come before signal - initialisations. */ - VG_(scheduler_init)(); - - /* Initialise the signal handling subsystem, temporarily parking - the saved blocking-mask in saved_sigmask. */ - VG_(sigstartup_actions)(); - - /* Perhaps we're profiling Valgrind? */ -# ifdef VG_PROFILE - VGP_(init_profiling)(); -# endif - - /* Start calibration of our RDTSC-based clock. */ - VG_(start_rdtsc_calibration)(); - - if (VG_(clo_instrument) || VG_(clo_cachesim)) { - VGP_PUSHCC(VgpInitAudit); - VGM_(init_memory_audit)(); - VGP_POPCC; - } - - VGP_PUSHCC(VgpReadSyms); - VG_(read_symbols)(); - VGP_POPCC; - - /* End calibration of our RDTSC-based clock, leaving it as long as - we can. */ - VG_(end_rdtsc_calibration)(); - - /* This should come after init_memory_audit; otherwise the latter - carefully sets up the permissions maps to cover the anonymous - mmaps for the translation table and translation cache, which - wastes > 20M of virtual address space. */ - VG_(init_tt_tc)(); - - if (VG_(clo_verbosity) == 1) { - VG_(message)(Vg_UserMsg, - "For more details, rerun with: -v"); - } - - /* Now it is safe for malloc et al in vg_clientmalloc.c to act - instrumented-ly. */ - VG_(running_on_simd_CPU) = True; - if (VG_(clo_instrument)) { - VGM_(make_readable) ( (Addr)&VG_(running_on_simd_CPU), 1 ); - VGM_(make_readable) ( (Addr)&VG_(clo_instrument), 1 ); - VGM_(make_readable) ( (Addr)&VG_(clo_trace_malloc), 1 ); - VGM_(make_readable) ( (Addr)&VG_(clo_sloppy_malloc), 1 ); - } - - if (VG_(clo_cachesim)) - VG_(init_cachesim)(); - - if (VG_(clo_verbosity) > 0) - VG_(message)(Vg_UserMsg, ""); - - VG_(bbs_to_go) = VG_(clo_stop_after); - - /* Run! */ - VGP_PUSHCC(VgpSched); - src = VG_(scheduler)(); - VGP_POPCC; - - if (VG_(clo_verbosity) > 0) - VG_(message)(Vg_UserMsg, ""); - - if (src == VgSrc_Deadlock) { - VG_(message)(Vg_UserMsg, - "Warning: pthread scheduler exited due to deadlock"); - } - - if (VG_(clo_instrument)) { - VG_(show_all_errors)(); - VG_(clientmalloc_done)(); - if (VG_(clo_verbosity) == 1) { - VG_(message)(Vg_UserMsg, - "For counts of detected errors, rerun with: -v"); - } - if (VG_(clo_leak_check)) VG_(detect_memory_leaks)(); - } - VG_(running_on_simd_CPU) = False; - - if (VG_(clo_cachesim)) - VG_(do_cachesim_results)(VG_(client_argc), VG_(client_argv)); - - VG_(do_sanity_checks)( True /*include expensive checks*/ ); - - if (VG_(clo_verbosity) > 1) - vg_show_counts(); - - if (0) { - VG_(message)(Vg_DebugMsg, ""); - VG_(message)(Vg_DebugMsg, - "------ Valgrind's internal memory use stats follow ------" ); - VG_(mallocSanityCheckAll)(); - VG_(show_all_arena_stats)(); - VG_(message)(Vg_DebugMsg, - "------ Valgrind's ExeContext management stats follow ------" ); - VG_(show_ExeContext_stats)(); - VG_(message)(Vg_DebugMsg, - "------ Valgrind's client block stats follow ---------------" ); - VG_(show_client_block_stats)(); - } - -# ifdef VG_PROFILE - VGP_(done_profiling)(); -# endif - - VG_(done_prof_mem)(); - - VG_(shutdown_logging)(); - - /* Remove valgrind.so from a LD_PRELOAD=... string so child - processes don't get traced into. Also mess up $libdir/valgrind - so that our libpthread.so disappears from view. */ - if (!VG_(clo_trace_children)) { - VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH)( - VG_(getenv)("LD_PRELOAD"), - VG_(getenv)("LD_LIBRARY_PATH") - ); - } - - /* Decide how to exit. This depends on what the scheduler - returned. */ - switch (src) { - case VgSrc_ExitSyscall: /* the normal way out */ - vg_assert(VG_(last_run_tid) > 0 - && VG_(last_run_tid) < VG_N_THREADS); - tst = & VG_(threads)[VG_(last_run_tid)]; - vg_assert(tst->status == VgTs_Runnable); - /* The thread's %EBX will hold the arg to exit(), so we just - do exit with that arg. */ - VG_(exit)( tst->m_ebx ); - /* NOT ALIVE HERE! */ - VG_(panic)("entered the afterlife in vg_main() -- ExitSyscall"); - break; /* what the hell :) */ - - case VgSrc_Deadlock: - /* Just exit now. No point in continuing. */ - VG_(exit)(0); - VG_(panic)("entered the afterlife in vg_main() -- Deadlock"); - break; - - case VgSrc_BbsDone: - /* Tricky; we have to try and switch back to the real CPU. - This is all very dodgy and won't work at all in the - presence of threads, or if the client happened to be - running a signal handler. */ - /* Prepare to restore state to the real CPU. */ - VG_(load_thread_state)(1 /* root thread */ ); - VG_(copy_baseBlock_to_m_state_static)(); - - /* This pushes a return address on the simulator's stack, - which is abandoned. We call vg_sigshutdown_actions() at - the end of vg_switch_to_real_CPU(), so as to ensure that - the original stack and machine state is restored before - the real signal mechanism is restored. */ - VG_(switch_to_real_CPU)(); - - default: - VG_(panic)("vg_main(): unexpected scheduler return code"); - } -} - - -/* Debugging thing .. can be called from assembly with OYNK macro. */ -void VG_(oynk) ( Int n ) -{ - OINK(n); -} - - -/* Find "valgrind.so" in a LD_PRELOAD=... string, and convert it to - "valgrinq.so", which doesn't do anything. This is used to avoid - tracing into child processes. To make this work the build system - also supplies a dummy file, "valgrinq.so". - - Also look for $(libdir)/lib/valgrind in LD_LIBRARY_PATH and change - it to $(libdir)/lib/valgrinq, so as to make our libpthread.so - disappear. -*/ -void VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH) ( Char* ld_preload_str, - Char* ld_library_path_str ) -{ - Char* p_prel = NULL; - Char* p_path = NULL; - Int what = 0; - if (ld_preload_str == NULL || ld_library_path_str == NULL) - goto mutancy; - - /* VG_(printf)("%s %s\n", ld_preload_str, ld_library_path_str); */ - - p_prel = VG_(strstr)(ld_preload_str, "valgrind.so"); - p_path = VG_(strstr)(ld_library_path_str, VG_LIBDIR); - - if (p_prel == NULL) { - /* perhaps already happened? */ - what = 1; - if (VG_(strstr)(ld_preload_str, "valgrinq.so") == NULL) - goto mutancy; - if (VG_(strstr)(ld_library_path_str, "lib/valgrinq") == NULL) - goto mutancy; - return; - } - - what = 2; - if (p_path == NULL) goto mutancy; - - /* in LD_PRELOAD, turn valgrind.so into valgrinq.so. */ - what = 3; - if (p_prel[7] != 'd') goto mutancy; - p_prel[7] = 'q'; - - /* in LD_LIBRARY_PATH, turn $libdir/valgrind (as configure'd) from - .../lib/valgrind .../lib/valgrinq, which doesn't exist, - so that our own libpthread.so goes out of scope. */ - p_path += VG_(strlen)(VG_LIBDIR); - what = 4; - if (p_path[0] != '/') goto mutancy; - p_path++; /* step over / */ - what = 5; - if (p_path[7] != 'd') goto mutancy; - p_path[7] = 'q'; - return; - - mutancy: - VG_(printf)( - "\nVG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH): internal error:\n" - " what = %d\n" - " ld_preload_str = `%s'\n" - " ld_library_path_str = `%s'\n" - " p_prel = `%s'\n" - " p_path = `%s'\n" - " VG_LIBDIR = `%s'\n", - what, ld_preload_str, ld_library_path_str, - p_prel, p_path, VG_LIBDIR - ); - VG_(printf)( - "\n" - "Note that this is often caused by mis-installation of valgrind.\n" - "Correct installation procedure is:\n" - " ./configure --prefix=/install/dir\n" - " make install\n" - "And then use /install/dir/bin/valgrind\n" - "Moving the installation directory elsewhere after 'make install'\n" - "will cause the above error. Hand-editing the paths in the shell\n" - "scripts is also likely to cause problems.\n" - "\n" - ); - VG_(panic)("VG_(mash_LD_PRELOAD_and_LD_LIBRARY_PATH) failed\n"); -} - - -/* RUNS ON THE CLIENT'S STACK, but on the real CPU. Start GDB and get - it to attach to this process. Called if the user requests this - service after an error has been shown, so she can poke around and - look at parameters, memory, etc. You can't meaningfully get GDB to - continue the program, though; to continue, quit GDB. */ -extern void VG_(start_GDB_whilst_on_client_stack) ( void ) -{ - Int res; - UChar buf[100]; - VG_(sprintf)(buf, - "/usr/bin/gdb -nw /proc/%d/exe %d", - VG_(getpid)(), VG_(getpid)()); - VG_(message)(Vg_UserMsg, "starting GDB with cmd: %s", buf); - res = VG_(system)(buf); - if (res == 0) { - VG_(message)(Vg_UserMsg, ""); - VG_(message)(Vg_UserMsg, - "GDB has detached. Valgrind regains control. We continue."); - } else { - VG_(message)(Vg_UserMsg, "Apparently failed!"); - VG_(message)(Vg_UserMsg, ""); - } -} - - -/* Print some helpful-ish text about unimplemented things, and give - up. */ -void VG_(unimplemented) ( Char* msg ) -{ - VG_(message)(Vg_UserMsg, ""); - VG_(message)(Vg_UserMsg, - "Valgrind detected that your program requires"); - VG_(message)(Vg_UserMsg, - "the following unimplemented functionality:"); - VG_(message)(Vg_UserMsg, " %s", msg); - VG_(message)(Vg_UserMsg, - "This may be because the functionality is hard to implement,"); - VG_(message)(Vg_UserMsg, - "or because no reasonable program would behave this way,"); - VG_(message)(Vg_UserMsg, - "or because nobody has yet needed it. In any case, let me know"); - VG_(message)(Vg_UserMsg, - "(jseward@acm.org) and/or try to work around the problem, if you can."); - VG_(message)(Vg_UserMsg, - ""); - VG_(message)(Vg_UserMsg, - "Valgrind has to exit now. Sorry. Bye!"); - VG_(message)(Vg_UserMsg, - ""); - VG_(pp_sched_status)(); - VG_(exit)(1); -} - - -void VG_(nvidia_moan) ( void) -{ - VG_(message)(Vg_UserMsg, - "The following failure _might_ be caused by linking to NVidia's\n " - "libGL.so, so avoiding it, if you can, _might_ help you. For example,\n " - "re-build any Qt libraries you are using without OpenGL support."); -} - - -/*--------------------------------------------------------------------*/ -/*--- end vg_main.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_malloc2.c b/coregrind/vg_malloc2.c deleted file mode 100644 index 87f580d01a..0000000000 --- a/coregrind/vg_malloc2.c +++ /dev/null @@ -1,1299 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- An implementation of malloc/free which doesn't use sbrk. ---*/ -/*--- vg_malloc2.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - - -#include "vg_include.h" - -/* Define to turn on (heavyweight) debugging machinery. */ -/* #define DEBUG_MALLOC */ - - -/*------------------------------------------------------------*/ -/*--- Structs n stuff ---*/ -/*------------------------------------------------------------*/ - -#define VG_REDZONE_LO_MASK 0x31415927 -#define VG_REDZONE_HI_MASK 0x14141356 - -#define VG_N_MALLOC_LISTS 16 /* do not change this */ - - -typedef UInt Word; -typedef Word WordF; -typedef Word WordL; - - -/* A superblock. */ -typedef - struct _Superblock { - struct _Superblock* next; - /* number of payload words in this superblock. */ - Int n_payload_words; - Word payload_words[0]; - } - Superblock; - - -/* An arena. */ -typedef - struct { - Char* name; - Int rz_szW; /* Red zone size in words */ - Bool rz_check; /* Check red-zone on free? */ - Int min_sblockW; /* Minimum superblock size */ - WordF* freelist[VG_N_MALLOC_LISTS]; - Superblock* sblocks; - /* Stats only. */ - UInt bytes_on_loan; - UInt bytes_mmaped; - UInt bytes_on_loan_max; - } - Arena; - - -/* Block layout: - - this block total sizeW (1 word) - freelist previous ptr (1 word) - freelist next ptr (1 word) - red zone words (depends on .rz_szW field of Arena) - (payload words) - red zone words (depends on .rz_szW field of Arena) - this block total sizeW (1 word) - - Total size in words (bszW) and payload size in words (pszW) - are related by - bszW == pszW + 4 + 2 * a->rz_szW - - Furthermore, both size fields in the block are negative if it is - not in use, and positive if it is in use. A block size of zero - is not possible, because a block always has at least four words - of overhead. -*/ -typedef - struct { - Int bszW_lo; - Word* prev; - Word* next; - Word redzone[0]; - } - BlockHeader; - - -/*------------------------------------------------------------*/ -/*--- Forwardses ... and misc ... ---*/ -/*------------------------------------------------------------*/ - -static Bool blockSane ( Arena* a, Word* b ); - -/* Align ptr p upwards to an align-sized boundary. */ -static -void* align_upwards ( void* p, Int align ) -{ - Addr a = (Addr)p; - if ((a % align) == 0) return (void*)a; - return (void*)(a - (a % align) + align); -} - - -/*------------------------------------------------------------*/ -/*--- Arena management stuff ---*/ -/*------------------------------------------------------------*/ - -/* The arena structures themselves. */ -static Arena vg_arena[VG_N_ARENAS]; - -/* Functions external to this module identify arenas using ArenaIds, - not Arena*s. This fn converts the former to the latter. */ -static Arena* arenaId_to_ArenaP ( ArenaId arena ) -{ - vg_assert(arena >= 0 && arena < VG_N_ARENAS); - return & vg_arena[arena]; -} - - -/* Initialise an arena. */ -static -void arena_init ( Arena* a, Char* name, - Int rz_szW, Bool rz_check, Int min_sblockW ) -{ - Int i; - vg_assert((min_sblockW % VKI_WORDS_PER_PAGE) == 0); - a->name = name; - a->rz_szW = rz_szW; - a->rz_check = rz_check; - a->min_sblockW = min_sblockW; - for (i = 0; i < VG_N_MALLOC_LISTS; i++) a->freelist[i] = NULL; - a->sblocks = NULL; - a->bytes_on_loan = 0; - a->bytes_mmaped = 0; - a->bytes_on_loan_max = 0; -} - - -/* Print vital stats for an arena. */ -void VG_(show_all_arena_stats) ( void ) -{ - Int i; - for (i = 0; i < VG_N_ARENAS; i++) { - VG_(message)(Vg_DebugMsg, - "Arena `%s': %7d max useful, %7d mmap'd, %7d current useful", - vg_arena[i].name, - vg_arena[i].bytes_on_loan_max, - vg_arena[i].bytes_mmaped, - vg_arena[i].bytes_on_loan - ); - } -} - - -/* It is important that this library is self-initialising, because it - may get called very early on -- as a result of C++ static - constructor initialisations -- before Valgrind itself is - initialised. Hence vg_malloc() and vg_free() below always call - ensure_mm_init() to ensure things are correctly initialised. */ - -static -void ensure_mm_init ( void ) -{ - static Bool init_done = False; - if (init_done) return; - - /* Use a checked red zone size of 1 word for our internal stuff, - and an unchecked zone of arbitrary size for the client. Of - course the client's red zone is checked really, but using the - addressibility maps, not by the mechanism implemented here, - which merely checks at the time of freeing that the red zone - words are unchanged. */ - - arena_init ( &vg_arena[VG_AR_PRIVATE], "private ", - 1, True, 262144 ); - - arena_init ( &vg_arena[VG_AR_SYMTAB], "symtab ", - 1, True, 262144 ); - - arena_init ( &vg_arena[VG_AR_CLIENT], "client ", - VG_AR_CLIENT_REDZONE_SZW, False, 262144 ); - - arena_init ( &vg_arena[VG_AR_DEMANGLE], "demangle", - 4 /*paranoid*/, True, 16384 ); - - arena_init ( &vg_arena[VG_AR_EXECTXT], "exectxt ", - 1, True, 16384 ); - - arena_init ( &vg_arena[VG_AR_ERRCTXT], "errctxt ", - 1, True, 16384 ); - - arena_init ( &vg_arena[VG_AR_TRANSIENT], "transien", - 2, True, 16384 ); - - init_done = True; -# ifdef DEBUG_MALLOC - VG_(mallocSanityCheckAll)(); -# endif -} - - -/*------------------------------------------------------------*/ -/*--- Arena management stuff ---*/ -/*------------------------------------------------------------*/ - -static -Superblock* newSuperblock ( Arena* a, Int cszW ) -{ - Superblock* sb; - cszW += 2; /* Take into account sb->next and sb->n_words fields */ - if (cszW < a->min_sblockW) cszW = a->min_sblockW; - while ((cszW % VKI_WORDS_PER_PAGE) > 0) cszW++; - sb = VG_(get_memory_from_mmap) ( cszW * sizeof(Word), - "newSuperblock" ); - sb->n_payload_words = cszW - 2; - a->bytes_mmaped += cszW * sizeof(Word); - if (0) - VG_(message)(Vg_DebugMsg, "newSuperblock, %d payload words", - sb->n_payload_words); - return sb; -} - - -/* Find the superblock containing the given chunk. */ -static -Superblock* findSb ( Arena* a, UInt* ch ) -{ - Superblock* sb; - for (sb = a->sblocks; sb; sb = sb->next) - if (&sb->payload_words[0] <= ch - && ch < &sb->payload_words[sb->n_payload_words]) - return sb; - VG_(printf)("findSb: can't find pointer %p in arena `%s'\n", - ch, a->name ); - VG_(panic)("findSb: vg_free() in wrong arena?"); - return NULL; /*NOTREACHED*/ -} - - -/*------------------------------------------------------------*/ -/*--- Low-level functions for working with blocks. ---*/ -/*------------------------------------------------------------*/ - -/* Add the not-in-use attribute to a bszW. */ -static __inline__ -Int mk_free_bszW ( Int bszW ) -{ - vg_assert(bszW != 0); - return (bszW < 0) ? bszW : -bszW; -} - -/* Add the in-use attribute to a bszW. */ -static __inline__ -Int mk_inuse_bszW ( Int bszW ) -{ - vg_assert(bszW != 0); - return (bszW < 0) ? -bszW : bszW; -} - -/* Remove the in-use/not-in-use attribute from a bszW, leaving just - the size. */ -static __inline__ -Int mk_plain_bszW ( Int bszW ) -{ - vg_assert(bszW != 0); - return (bszW < 0) ? -bszW : bszW; -} - -/* Does this bszW have the in-use attribute ? */ -static __inline__ -Bool is_inuse_bszW ( Int bszW ) -{ - vg_assert(bszW != 0); - return (bszW < 0) ? False : True; -} - - -/* Given the addr of the first word of a block, return the addr of the - last word. */ -static __inline__ -WordL* first_to_last ( WordF* fw ) -{ - return fw + mk_plain_bszW(fw[0]) - 1; -} - -/* Given the addr of the last word of a block, return the addr of the - first word. */ -static __inline__ -WordF* last_to_first ( WordL* lw ) -{ - return lw - mk_plain_bszW(lw[0]) + 1; -} - - -/* Given the addr of the first word of a block, return the addr of the - first word of its payload. */ -static __inline__ -Word* first_to_payload ( Arena* a, WordF* fw ) -{ - return & fw[3 + a->rz_szW]; -} - -/* Given the addr of the first word of a the payload of a block, - return the addr of the first word of the block. */ -static __inline__ -Word* payload_to_first ( Arena* a, WordF* payload ) -{ - return & payload[- 3 - a->rz_szW]; -} - -/* Set and get the lower size field of a block. */ -static __inline__ -void set_bszW_lo ( WordF* fw, Int bszW ) { - fw[0] = bszW; -} -static __inline__ -Int get_bszW_lo ( WordF* fw ) -{ - return fw[0]; -} - - -/* Set and get the next and previous link fields of a block. */ -static __inline__ -void set_prev_p ( WordF* fw, Word* prev_p ) { - fw[1] = (Word)prev_p; -} -static __inline__ -void set_next_p ( WordF* fw, Word* next_p ) { - fw[2] = (Word)next_p; -} -static __inline__ -Word* get_prev_p ( WordF* fw ) { - return (Word*)(fw[1]); -} -static __inline__ -Word* get_next_p ( WordF* fw ) { - return (Word*)(fw[2]); -} - - -/* Set and get the upper size field of a block. */ -static __inline__ -void set_bszW_hi ( WordF* fw, Int bszW ) { - WordL* lw = first_to_last(fw); - vg_assert(lw == fw + mk_plain_bszW(bszW) - 1); - lw[0] = bszW; -} -static __inline__ -Int get_bszW_hi ( WordF* fw ) { - WordL* lw = first_to_last(fw); - return lw[0]; -} - -/* Get the upper size field of a block, given a pointer to the last - word of it. */ -static __inline__ -Int get_bszW_hi_from_last_word ( WordL* lw ) { - WordF* fw = last_to_first(lw); - return get_bszW_lo(fw); -} - - -/* Read and write the lower and upper red-zone words of a block. */ -static __inline__ -void set_rz_lo_word ( Arena* a, WordF* fw, Int rz_wordno, Word w ) -{ - fw[3 + rz_wordno] = w; -} -static __inline__ -void set_rz_hi_word ( Arena* a, WordF* fw, Int rz_wordno, Word w ) -{ - WordL* lw = first_to_last(fw); - lw[-1-rz_wordno] = w; -} -static __inline__ -Word get_rz_lo_word ( Arena* a, WordF* fw, Int rz_wordno ) -{ - return fw[3 + rz_wordno]; -} -static __inline__ -Word get_rz_hi_word ( Arena* a, WordF* fw, Int rz_wordno ) -{ - WordL* lw = first_to_last(fw); - return lw[-1-rz_wordno]; -} - - -/* Return the lower, upper and total overhead in words for a block. - These are determined purely by which arena the block lives in. */ -static __inline__ -Int overhead_szW_lo ( Arena* a ) -{ - return 3 + a->rz_szW; -} -static __inline__ -Int overhead_szW_hi ( Arena* a ) -{ - return 1 + a->rz_szW; -} -static __inline__ -Int overhead_szW ( Arena* a ) -{ - return overhead_szW_lo(a) + overhead_szW_hi(a); -} - - -/* Convert pointer size in words to block size in words, and back. */ -static __inline__ -Int pszW_to_bszW ( Arena* a, Int pszW ) -{ - vg_assert(pszW >= 0); - return pszW + overhead_szW(a); -} -static __inline__ -Int bszW_to_pszW ( Arena* a, Int bszW ) -{ - Int pszW = bszW - overhead_szW(a); - vg_assert(pszW >= 0); - return pszW; -} - -/*------------------------------------------------------------*/ -/*--- Functions for working with freelists. ---*/ -/*------------------------------------------------------------*/ - -/* Determination of which freelist a block lives on is based on the - payload size, not block size, in words. */ - -/* Convert a payload size in words to a freelist number. */ - -static -Int pszW_to_listNo ( Int pszW ) -{ - vg_assert(pszW >= 0); - if (pszW <= 3) return 0; - if (pszW <= 4) return 1; - if (pszW <= 5) return 2; - if (pszW <= 6) return 3; - if (pszW <= 7) return 4; - if (pszW <= 8) return 5; - if (pszW <= 9) return 6; - if (pszW <= 10) return 7; - if (pszW <= 11) return 8; - if (pszW <= 12) return 9; - if (pszW <= 16) return 10; - if (pszW <= 32) return 11; - if (pszW <= 64) return 12; - if (pszW <= 128) return 13; - if (pszW <= 256) return 14; - return 15; -} - - -/* What are the minimum and maximum payload sizes for a given list? */ - -static -Int listNo_to_pszW_min ( Int listNo ) -{ - Int pszW = 0; - vg_assert(listNo >= 0 && listNo <= VG_N_MALLOC_LISTS); - while (pszW_to_listNo(pszW) < listNo) pszW++; - return pszW; -} - -static -Int listNo_to_pszW_max ( Int listNo ) -{ - vg_assert(listNo >= 0 && listNo <= VG_N_MALLOC_LISTS); - if (listNo == VG_N_MALLOC_LISTS-1) { - return 999999999; - } else { - return listNo_to_pszW_min(listNo+1) - 1; - } -} - - -/* A nasty hack to try and reduce fragmentation. Try and replace - a->freelist[lno] with another block on the same list but with a - lower address, with the idea of attempting to recycle the same - blocks rather than cruise through the address space. */ - -static -void swizzle ( Arena* a, Int lno ) -{ - UInt* p_best; - UInt* pp; - UInt* pn; - Int i; - - p_best = a->freelist[lno]; - if (p_best == NULL) return; - - pn = pp = p_best; - for (i = 0; i < 20; i++) { - pn = get_next_p(pn); - pp = get_prev_p(pp); - if (pn < p_best) p_best = pn; - if (pp < p_best) p_best = pp; - } - if (p_best < a->freelist[lno]) { -# ifdef DEBUG_MALLOC - VG_(printf)("retreat by %d\n", - ((Char*)(a->freelist[lno])) - ((Char*)p_best)); -# endif - a->freelist[lno] = p_best; - } -} - - -/*------------------------------------------------------------*/ -/*--- Creating and deleting blocks. ---*/ -/*------------------------------------------------------------*/ - -/* Mark the words at b .. b+bszW-1 as not in use, and add them to the - relevant free list. */ - -static -void mkFreeBlock ( Arena* a, Word* b, Int bszW, Int b_lno ) -{ - Int pszW = bszW_to_pszW(a, bszW); - vg_assert(pszW >= 0); - vg_assert(b_lno == pszW_to_listNo(pszW)); - /* Set the size fields and indicate not-in-use. */ - set_bszW_lo(b, mk_free_bszW(bszW)); - set_bszW_hi(b, mk_free_bszW(bszW)); - - /* Add to the relevant list. */ - if (a->freelist[b_lno] == NULL) { - set_prev_p(b, b); - set_next_p(b, b); - a->freelist[b_lno] = b; - } else { - Word* b_prev = get_prev_p(a->freelist[b_lno]); - Word* b_next = a->freelist[b_lno]; - set_next_p(b_prev, b); - set_prev_p(b_next, b); - set_next_p(b, b_next); - set_prev_p(b, b_prev); - } -# ifdef DEBUG_MALLOC - (void)blockSane(a,b); -# endif -} - - -/* Mark the words at b .. b+bszW-1 as in use, and set up the block - appropriately. */ -static -void mkInuseBlock ( Arena* a, UInt* b, UInt bszW ) -{ - Int i; - set_bszW_lo(b, mk_inuse_bszW(bszW)); - set_bszW_hi(b, mk_inuse_bszW(bszW)); - set_prev_p(b, NULL); - set_next_p(b, NULL); - if (a->rz_check) { - for (i = 0; i < a->rz_szW; i++) { - set_rz_lo_word(a, b, i, (UInt)b ^ VG_REDZONE_LO_MASK); - set_rz_hi_word(a, b, i, (UInt)b ^ VG_REDZONE_HI_MASK); - } - } -# ifdef DEBUG_MALLOC - (void)blockSane(a,b); -# endif -} - - -/* Remove a block from a given list. Does no sanity checking. */ -static -void unlinkBlock ( Arena* a, UInt* b, Int listno ) -{ - vg_assert(listno >= 0 && listno < VG_N_MALLOC_LISTS); - if (get_prev_p(b) == b) { - /* Only one element in the list; treat it specially. */ - vg_assert(get_next_p(b) == b); - a->freelist[listno] = NULL; - } else { - UInt* b_prev = get_prev_p(b); - UInt* b_next = get_next_p(b); - a->freelist[listno] = b_prev; - set_next_p(b_prev, b_next); - set_prev_p(b_next, b_prev); - swizzle ( a, listno ); - } - set_prev_p(b, NULL); - set_next_p(b, NULL); -} - - -/* Split an existing free block into two pieces, and put the fragment - (the second one along in memory) onto the relevant free list. - req_bszW is the required size of the block which isn't the - fragment. */ -static -void splitChunk ( Arena* a, UInt* b, Int b_listno, UInt req_bszW ) -{ - Int b_bszW, frag_bszW; - b_bszW = mk_plain_bszW(get_bszW_lo(b)); - vg_assert(req_bszW < b_bszW); - frag_bszW = b_bszW - req_bszW; - vg_assert(frag_bszW >= overhead_szW(a)); - /* - printf( "split %d into %d and %d\n", - b_bszW,req_bszW,frag_bszW ); - */ - vg_assert(bszW_to_pszW(a, frag_bszW) > 0); - unlinkBlock(a, b, b_listno); - mkInuseBlock(a, b, req_bszW); - mkFreeBlock(a, &b[req_bszW], frag_bszW, - pszW_to_listNo(bszW_to_pszW(a, frag_bszW))); -} - - -/*------------------------------------------------------------*/ -/*--- Sanity-check/debugging machinery. ---*/ -/*------------------------------------------------------------*/ - -/* Do some crude sanity checks on a chunk. */ -static -Bool blockSane ( Arena* a, Word* b ) -{ -# define BLEAT(str) VG_(printf)("blockSane: fail -- %s\n",str) - Int i; - if (get_bszW_lo(b) != get_bszW_hi(b)) - {BLEAT("sizes");return False;} - if (a->rz_check && is_inuse_bszW(get_bszW_lo(b))) { - for (i = 0; i < a->rz_szW; i++) { - if (get_rz_lo_word(a, b, i) != ((Word)b ^ VG_REDZONE_LO_MASK)) - {BLEAT("redzone-lo");return False;} - if (get_rz_hi_word(a, b, i) != ((Word)b ^ VG_REDZONE_HI_MASK)) - {BLEAT("redzone-hi");return False;} - } - } - return True; -# undef BLEAT -} - - -/* Print superblocks (only for debugging). */ -static -void ppSuperblocks ( Arena* a ) -{ - Int i, ch_bszW, blockno; - UInt* ch; - Superblock* sb = a->sblocks; - blockno = 1; - - while (sb) { - VG_(printf)( "\n" ); - VG_(printf)( "superblock %d at %p, sb->n_pl_ws = %d, next = %p\n", - blockno++, sb, sb->n_payload_words, sb->next ); - i = 0; - while (True) { - if (i >= sb->n_payload_words) break; - ch = &sb->payload_words[i]; - ch_bszW = get_bszW_lo(ch); - VG_(printf)( " block at %d, bszW %d: ", i, mk_plain_bszW(ch_bszW) ); - VG_(printf)( "%s, ", is_inuse_bszW(ch_bszW) ? "inuse" : "free" ); - VG_(printf)( "%s\n", blockSane(a,ch) ? "ok" : "BAD" ); - i += mk_plain_bszW(ch_bszW); - } - if (i > sb->n_payload_words) - VG_(printf)( " last block overshoots end of SB\n"); - sb = sb->next; - } - VG_(printf)( "end of superblocks\n\n" ); -} - - -/* Sanity check both the superblocks and the chains. */ -void VG_(mallocSanityCheckArena) ( ArenaId aid ) -{ - Int i, superblockctr, b_bszW, b_pszW, blockctr_sb, blockctr_li; - Int blockctr_sb_free, listno, list_min_pszW, list_max_pszW; - Superblock* sb; - Bool thisFree, lastWasFree; - Word* b; - Word* b_prev; - UInt arena_bytes_on_loan; - Arena* a; - -# define BOMB VG_(panic)("vg_mallocSanityCheckArena") - - a = arenaId_to_ArenaP(aid); - - /* First, traverse all the superblocks, inspecting the chunks in - each. */ - superblockctr = blockctr_sb = blockctr_sb_free = 0; - arena_bytes_on_loan = 0; - sb = a->sblocks; - while (sb) { - lastWasFree = False; - superblockctr++; - i = 0; - while (True) { - if (i >= sb->n_payload_words) break; - blockctr_sb++; - b = &sb->payload_words[i]; - b_bszW = get_bszW_lo(b); - if (!blockSane(a, b)) { - VG_(printf)( "mallocSanityCheck: sb %p, block %d (bszW %d): " - "BAD\n", - sb, i, b_bszW ); - BOMB; - } - thisFree = !is_inuse_bszW(b_bszW); - if (thisFree && lastWasFree) { - VG_(printf)( "mallocSanityCheck: sb %p, block %d (bszW %d): " - "UNMERGED FREES\n", - sb, i, b_bszW ); - BOMB; - } - lastWasFree = thisFree; - if (thisFree) blockctr_sb_free++; - if (!thisFree) - arena_bytes_on_loan += sizeof(Word) * bszW_to_pszW(a, b_bszW); - i += mk_plain_bszW(b_bszW); - } - if (i > sb->n_payload_words) { - VG_(printf)( "mallocSanityCheck: sb %p: last block " - "overshoots end\n", sb); - BOMB; - } - sb = sb->next; - } - - if (arena_bytes_on_loan != a->bytes_on_loan) { - VG_(printf)( - "mallocSanityCheck: a->bytes_on_loan %d, " - "arena_bytes_on_loan %d: " - "MISMATCH\n", a->bytes_on_loan, arena_bytes_on_loan); - ppSuperblocks(a); - BOMB; - } - - /* Second, traverse each list, checking that the back pointers make - sense, counting blocks encountered, and checking that each block - is an appropriate size for this list. */ - blockctr_li = 0; - for (listno = 0; listno < VG_N_MALLOC_LISTS; listno++) { - list_min_pszW = listNo_to_pszW_min(listno); - list_max_pszW = listNo_to_pszW_max(listno); - b = a->freelist[listno]; - if (b == NULL) continue; - while (True) { - b_prev = b; - b = get_next_p(b); - if (get_prev_p(b) != b_prev) { - VG_(printf)( "mallocSanityCheck: list %d at %p: " - "BAD LINKAGE\n", - listno, b ); - BOMB; - } - b_pszW = bszW_to_pszW(a, mk_plain_bszW(get_bszW_lo(b))); - if (b_pszW < list_min_pszW || b_pszW > list_max_pszW) { - VG_(printf)( - "mallocSanityCheck: list %d at %p: " - "WRONG CHAIN SIZE %d (%d, %d)\n", - listno, b, b_pszW, list_min_pszW, list_max_pszW ); - BOMB; - } - blockctr_li++; - if (b == a->freelist[listno]) break; - } - } - - if (blockctr_sb_free != blockctr_li) { - VG_(printf)( - "mallocSanityCheck: BLOCK COUNT MISMATCH " - "(via sbs %d, via lists %d)\n", - blockctr_sb_free, blockctr_li ); - ppSuperblocks(a); - BOMB; - } - - VG_(message)(Vg_DebugMsg, - "mSC [%s]: %2d sbs, %5d tot bs, %4d/%-4d free bs, " - "%2d lists, %7d mmap, %7d loan", - a->name, - superblockctr, - blockctr_sb, blockctr_sb_free, blockctr_li, - VG_N_MALLOC_LISTS, - a->bytes_mmaped, a->bytes_on_loan); -# undef BOMB -} - - -void VG_(mallocSanityCheckAll) ( void ) -{ - Int i; - for (i = 0; i < VG_N_ARENAS; i++) - VG_(mallocSanityCheckArena) ( i ); -} - - -/* Really, this isn't the right place for this. Nevertheless: find - out if an arena is empty -- currently has no bytes on loan. This - is useful for checking for memory leaks (of valgrind, not the - client.) -*/ -Bool VG_(is_empty_arena) ( ArenaId aid ) -{ - Arena* a; - Superblock* sb; - WordF* b; - Int b_bszW; - ensure_mm_init(); - a = arenaId_to_ArenaP(aid); - for (sb = a->sblocks; sb != NULL; sb = sb->next) { - /* If the superblock is empty, it should contain a single free - block, of the right size. */ - b = &(sb->payload_words[0]); - b_bszW = get_bszW_lo(b); - if (is_inuse_bszW(b_bszW)) return False; - if (mk_plain_bszW(b_bszW) != sb->n_payload_words) return False; - /* So this block is not in use and is of the right size. Keep - going. */ - } - return True; -} - - -/*------------------------------------------------------------*/ -/*--- Externally-visible functions. ---*/ -/*------------------------------------------------------------*/ - -void* VG_(malloc) ( ArenaId aid, Int req_pszB ) -{ - Int req_pszW, req_bszW, frag_bszW, b_bszW, lno; - Superblock* new_sb; - Word* b; - Arena* a; - - VGP_PUSHCC(VgpMalloc); - - ensure_mm_init(); - a = arenaId_to_ArenaP(aid); - - vg_assert(req_pszB >= 0); - vg_assert(req_pszB < 0x7FFFFFF0); - - req_pszW = (req_pszB + VKI_BYTES_PER_WORD - 1) / VKI_BYTES_PER_WORD; - - /* Keep gcc -O happy: */ - b = NULL; - - /* Start searching at this list. */ - lno = pszW_to_listNo(req_pszW); - - /* This loop finds a list which has a block big enough, or sets - req_listno to N_LISTS if no such block exists. */ - while (True) { - if (lno == VG_N_MALLOC_LISTS) break; - /* If this list is empty, try the next one. */ - if (a->freelist[lno] == NULL) { - lno++; - continue; - } - /* Scan a->list[lno] to find a big-enough chunk. */ - b = a->freelist[lno]; - b_bszW = mk_plain_bszW(get_bszW_lo(b)); - while (True) { - if (bszW_to_pszW(a, b_bszW) >= req_pszW) break; - b = get_next_p(b); - b_bszW = mk_plain_bszW(get_bszW_lo(b)); - if (b == a->freelist[lno]) break; - } - if (bszW_to_pszW(a, b_bszW) >= req_pszW) break; - /* No luck? Try a larger list. */ - lno++; - } - - /* Either lno < VG_N_MALLOC_LISTS and b points to the selected - block, or lno == VG_N_MALLOC_LISTS, and we have to allocate a - new superblock. */ - - if (lno == VG_N_MALLOC_LISTS) { - req_bszW = pszW_to_bszW(a, req_pszW); - new_sb = newSuperblock(a, req_bszW); - vg_assert(new_sb != NULL); - new_sb->next = a->sblocks; - a->sblocks = new_sb; - b = &(new_sb->payload_words[0]); - lno = pszW_to_listNo(bszW_to_pszW(a, new_sb->n_payload_words)); - mkFreeBlock ( a, b, new_sb->n_payload_words, lno); - } - - /* Ok, we can allocate from b, which lives in list req_listno. */ - vg_assert(b != NULL); - vg_assert(lno >= 0 && lno < VG_N_MALLOC_LISTS); - vg_assert(a->freelist[lno] != NULL); - b_bszW = mk_plain_bszW(get_bszW_lo(b)); - req_bszW = pszW_to_bszW(a, req_pszW); - /* req_bszW is the size of the block we are after. b_bszW is the - size of what we've actually got. */ - vg_assert(b_bszW >= req_bszW); - - /* Could we split this block and still get a useful fragment? - Where "useful" means that the payload size of the frag is at - least one word. */ - frag_bszW = b_bszW - req_bszW; - if (frag_bszW > overhead_szW(a)) { - splitChunk(a, b, lno, req_bszW); - } else { - /* No, mark as in use and use as-is. */ - unlinkBlock(a, b, lno); - /* - set_bszW_lo(b, mk_inuse_bszW(b_bszW)); - set_bszW_hi(b, mk_inuse_bszW(b_bszW)); - */ - mkInuseBlock(a, b, b_bszW); - } - vg_assert(req_bszW <= mk_plain_bszW(get_bszW_lo(b))); - - a->bytes_on_loan - += sizeof(Word) - * bszW_to_pszW(a, mk_plain_bszW(get_bszW_lo(b))); - if (a->bytes_on_loan > a->bytes_on_loan_max) - a->bytes_on_loan_max = a->bytes_on_loan; - -# ifdef DEBUG_MALLOC - VG_(mallocSanityCheckArena)(aid); -# endif - - VGP_POPCC; - return first_to_payload(a, b); -} - - -void VG_(free) ( ArenaId aid, void* ptr ) -{ - Superblock* sb; - UInt* sb_payl_firstw; - UInt* sb_payl_lastw; - UInt* other; - UInt* ch; - Int ch_bszW, ch_pszW, other_bszW, ch_listno; - Arena* a; - - VGP_PUSHCC(VgpMalloc); - - ensure_mm_init(); - a = arenaId_to_ArenaP(aid); - - if (ptr == NULL) return; - - ch = payload_to_first(a, ptr); - -# ifdef DEBUG_MALLOC - vg_assert(blockSane(a,ch)); -# endif - - a->bytes_on_loan - -= sizeof(Word) - * bszW_to_pszW(a, mk_plain_bszW(get_bszW_lo(ch))); - - sb = findSb( a, ch ); - sb_payl_firstw = &(sb->payload_words[0]); - sb_payl_lastw = &(sb->payload_words[sb->n_payload_words-1]); - - /* Put this chunk back on a list somewhere. */ - ch_bszW = get_bszW_lo(ch); - ch_pszW = bszW_to_pszW(a, ch_bszW); - ch_listno = pszW_to_listNo(ch_pszW); - mkFreeBlock( a, ch, ch_bszW, ch_listno ); - - /* See if this block can be merged with the following one. */ - other = ch + ch_bszW; - /* overhead_szW(a) is the smallest possible bszW for this arena. - So the nearest possible end to the block beginning at other is - other+overhead_szW(a)-1. Hence the test below. */ - if (other+overhead_szW(a)-1 <= sb_payl_lastw) { - other_bszW = get_bszW_lo(other); - if (!is_inuse_bszW(other_bszW)) { - /* VG_(printf)( "merge-successor\n"); */ - other_bszW = mk_plain_bszW(other_bszW); -# ifdef DEBUG_MALLOC - vg_assert(blockSane(a, other)); -# endif - unlinkBlock( a, ch, ch_listno ); - unlinkBlock( a, other, pszW_to_listNo(bszW_to_pszW(a,other_bszW)) ); - ch_bszW += other_bszW; - ch_listno = pszW_to_listNo(bszW_to_pszW(a, ch_bszW)); - mkFreeBlock( a, ch, ch_bszW, ch_listno ); - } - } - - /* See if this block can be merged with its predecessor. */ - if (ch-overhead_szW(a) >= sb_payl_firstw) { - other_bszW = get_bszW_hi_from_last_word( ch-1 ); - if (!is_inuse_bszW(other_bszW)) { - /* VG_(printf)( "merge-predecessor\n"); */ - other = last_to_first( ch-1 ); - other_bszW = mk_plain_bszW(other_bszW); - unlinkBlock( a, ch, ch_listno ); - unlinkBlock( a, other, pszW_to_listNo(bszW_to_pszW(a, other_bszW)) ); - ch = other; - ch_bszW += other_bszW; - ch_listno = pszW_to_listNo(bszW_to_pszW(a, ch_bszW)); - mkFreeBlock( a, ch, ch_bszW, ch_listno ); - } - } - -# ifdef DEBUG_MALLOC - VG_(mallocSanityCheckArena)(aid); -# endif - - VGP_POPCC; -} - - -/* - The idea for malloc_aligned() is to allocate a big block, base, and - then split it into two parts: frag, which is returned to the the - free pool, and align, which is the bit we're really after. Here's - a picture. L and H denote the block lower and upper overheads, in - words. The details are gruesome. Note it is slightly complicated - because the initial request to generate base may return a bigger - block than we asked for, so it is important to distinguish the base - request size and the base actual size. - - frag_b align_b - | | - | frag_p | align_p - | | | | - v v v v - - +---+ +---+---+ +---+ - | L |----------------| H | L |---------------| H | - +---+ +---+---+ +---+ - - ^ ^ ^ - | | : - | base_p this addr must be aligned - | - base_b - - . . . . . . . - <------ frag_bszW -------> . . . - . <------------- base_pszW_act -----------> . - . . . . . . . - -*/ -void* VG_(malloc_aligned) ( ArenaId aid, Int req_alignB, Int req_pszB ) -{ - Int req_alignW, req_pszW, base_pszW_req, base_pszW_act, frag_bszW; - Word *base_b, *base_p, *align_p; - UInt saved_bytes_on_loan; - Arena* a; - - ensure_mm_init(); - a = arenaId_to_ArenaP(aid); - - vg_assert(req_pszB >= 0); - vg_assert(req_pszB < 0x7FFFFFF0); - - /* Check that the requested alignment seems reasonable; that is, is - a power of 2. */ - switch (req_alignB) { - case 4: - case 8: case 16: case 32: case 64: case 128: case 256: - case 512: case 1024: case 2048: case 4096: case 8192: - case 16384: case 32768: case 65536: case 131072: - case 262144: - case 1048576: - /* can't be bothered to calculate larger ones */ - break; - default: - VG_(printf)("vg_malloc_aligned(%p, %d, %d)\nbad alignment request", - a, req_pszB, req_alignB ); - VG_(panic)("vg_malloc_aligned"); - /*NOTREACHED*/ - } - - /* Required alignment, in words. Since it's constrained to be a - power of 2 >= word size, no need to align the alignment. Still, - we check. */ - req_alignW = req_alignB / VKI_BYTES_PER_WORD; - vg_assert(req_alignB == req_alignW * VKI_BYTES_PER_WORD); - - /* Required payload size for the aligned chunk. */ - req_pszW = (req_pszB + VKI_BYTES_PER_WORD - 1) / VKI_BYTES_PER_WORD; - - /* Payload size to request for the big block that we will split - up. */ - base_pszW_req = req_pszW + overhead_szW(a) + req_alignW; - - /* Payload ptr for the block we are going to split. Note this - changes a->bytes_on_loan; we save and restore it ourselves. */ - saved_bytes_on_loan = a->bytes_on_loan; - base_p = VG_(malloc) ( aid, base_pszW_req * VKI_BYTES_PER_WORD ); - a->bytes_on_loan = saved_bytes_on_loan; - - /* Block ptr for the block we are going to split. */ - base_b = payload_to_first ( a, base_p ); - - /* Pointer to the payload of the aligned block we are going to - return. This has to be suitably aligned. */ - align_p = align_upwards ( base_b + 2 * overhead_szW_lo(a) - + overhead_szW_hi(a), - req_alignB ); - - /* The block size of the fragment we will create. This must be big - enough to actually create a fragment. */ - frag_bszW = align_p - overhead_szW_lo(a) - base_b; - vg_assert(frag_bszW >= overhead_szW(a)); - - /* The actual payload size of the block we are going to split. */ - base_pszW_act = bszW_to_pszW(a, mk_plain_bszW(get_bszW_lo(base_b))); - - /* Create the fragment block, and put it back on the relevant free - list. */ - mkFreeBlock ( a, base_b, frag_bszW, - pszW_to_listNo(bszW_to_pszW(a, frag_bszW)) ); - - /* Create the aligned block. */ - mkInuseBlock ( a, - align_p - overhead_szW_lo(a), - base_p + base_pszW_act - + overhead_szW_hi(a) - - (align_p - overhead_szW_lo(a)) ); - - /* Final sanity checks. */ - vg_assert(( (UInt)align_p % req_alignB) == 0); - - vg_assert(is_inuse_bszW(get_bszW_lo(payload_to_first(a, align_p)))); - - vg_assert(req_pszW - <= - bszW_to_pszW(a, mk_plain_bszW(get_bszW_lo( - payload_to_first(a, align_p)))) - ); - - a->bytes_on_loan - += sizeof(Word) - * bszW_to_pszW(a, mk_plain_bszW(get_bszW_lo( - payload_to_first(a, align_p)))); - if (a->bytes_on_loan > a->bytes_on_loan_max) - a->bytes_on_loan_max = a->bytes_on_loan; - -# ifdef DEBUG_MALLOC - VG_(mallocSanityCheckArena)(aid); -# endif - - return align_p; -} - - -/*------------------------------------------------------------*/ -/*--- Services layered on top of malloc/free. ---*/ -/*------------------------------------------------------------*/ - -void* VG_(calloc) ( ArenaId aid, Int nmemb, Int nbytes ) -{ - Int i, size; - UChar* p; - size = nmemb * nbytes; - vg_assert(size >= 0); - p = VG_(malloc) ( aid, size ); - for (i = 0; i < size; i++) p[i] = 0; - return p; -} - - -void* VG_(realloc) ( ArenaId aid, void* ptr, Int req_pszB ) -{ - Arena* a; - Int old_bszW, old_pszW, old_pszB, i; - UChar *p_old, *p_new; - UInt* ch; - - ensure_mm_init(); - a = arenaId_to_ArenaP(aid); - - vg_assert(req_pszB >= 0); - vg_assert(req_pszB < 0x7FFFFFF0); - - ch = payload_to_first(a, ptr); - vg_assert(blockSane(a, ch)); - - old_bszW = get_bszW_lo(ch); - vg_assert(is_inuse_bszW(old_bszW)); - old_bszW = mk_plain_bszW(old_bszW); - old_pszW = bszW_to_pszW(a, old_bszW); - old_pszB = old_pszW * VKI_BYTES_PER_WORD; - - if (req_pszB <= old_pszB) return ptr; - - p_new = VG_(malloc) ( aid, req_pszB ); - p_old = (UChar*)ptr; - for (i = 0; i < old_pszB; i++) - p_new[i] = p_old[i]; - - VG_(free)(aid, p_old); - return p_new; -} - - -/*------------------------------------------------------------*/ -/*--- The original test driver machinery. ---*/ -/*------------------------------------------------------------*/ - -#if 0 - -#if 1 -#define N_TEST_TRANSACTIONS 100000000 -#define N_TEST_ARR 200000 -#define M_TEST_MALLOC 1000 -#else -#define N_TEST_TRANSACTIONS 500000 -#define N_TEST_ARR 30000 -#define M_TEST_MALLOC 500 -#endif - - -void* test_arr[N_TEST_ARR]; - -int main ( int argc, char** argv ) -{ - Int i, j, k, nbytes, qq; - unsigned char* chp; - Arena* a = &arena[VG_AR_PRIVATE]; - srandom(1); - for (i = 0; i < N_TEST_ARR; i++) - test_arr[i] = NULL; - - for (i = 0; i < N_TEST_TRANSACTIONS; i++) { - if (i % 50000 == 0) mallocSanityCheck(a); - j = random() % N_TEST_ARR; - if (test_arr[j]) { - vg_free(a, test_arr[j]); - test_arr[j] = NULL; - } else { - nbytes = 1 + random() % M_TEST_MALLOC; - qq = random()%64; - if (qq == 32) - nbytes *= 17; - else if (qq == 33) - nbytes = 0; - test_arr[j] - = (i % 17) == 0 - ? vg_memalign(a, nbytes, 1<< (3+(random()%10))) - : vg_malloc( a, nbytes ); - chp = test_arr[j]; - for (k = 0; k < nbytes; k++) - chp[k] = (unsigned char)(k + 99); - } - } - - - for (i = 0; i < N_TEST_ARR; i++) { - if (test_arr[i]) { - vg_free(a, test_arr[i]); - test_arr[i] = NULL; - } - } - mallocSanityCheck(a); - - fprintf(stderr, "ALL DONE\n"); - - show_arena_stats(a); - fprintf(stderr, "%d max useful, %d bytes mmap'd (%4.1f%%), %d useful\n", - a->bytes_on_loan_max, - a->bytes_mmaped, - 100.0 * (double)a->bytes_on_loan_max / (double)a->bytes_mmaped, - a->bytes_on_loan ); - - return 0; -} -#endif /* 0 */ - - -/*--------------------------------------------------------------------*/ -/*--- end vg_malloc2.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_memory.c b/coregrind/vg_memory.c deleted file mode 100644 index eea79cbe9a..0000000000 --- a/coregrind/vg_memory.c +++ /dev/null @@ -1,2414 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- Maintain bitmaps of memory, tracking the accessibility (A) ---*/ -/*--- and validity (V) status of each byte. ---*/ -/*--- vg_memory.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_include.h" - -/* Define to debug the mem audit system. */ -/* #define VG_DEBUG_MEMORY */ - -/* Define to debug the memory-leak-detector. */ -/* #define VG_DEBUG_LEAKCHECK */ - -/* Define to collect detailed performance info. */ -/* #define VG_PROFILE_MEMORY */ - - -/*------------------------------------------------------------*/ -/*--- Low-level support for memory checking. ---*/ -/*------------------------------------------------------------*/ - -/* - All reads and writes are checked against a memory map, which - records the state of all memory in the process. The memory map is - organised like this: - - The top 16 bits of an address are used to index into a top-level - map table, containing 65536 entries. Each entry is a pointer to a - second-level map, which records the accesibililty and validity - permissions for the 65536 bytes indexed by the lower 16 bits of the - address. Each byte is represented by nine bits, one indicating - accessibility, the other eight validity. So each second-level map - contains 73728 bytes. This two-level arrangement conveniently - divides the 4G address space into 64k lumps, each size 64k bytes. - - All entries in the primary (top-level) map must point to a valid - secondary (second-level) map. Since most of the 4G of address - space will not be in use -- ie, not mapped at all -- there is a - distinguished secondary map, which indicates `not addressible and - not valid' writeable for all bytes. Entries in the primary map for - which the entire 64k is not in use at all point at this - distinguished map. - - [...] lots of stuff deleted due to out of date-ness - - As a final optimisation, the alignment and address checks for - 4-byte loads and stores are combined in a neat way. The primary - map is extended to have 262144 entries (2^18), rather than 2^16. - The top 3/4 of these entries are permanently set to the - distinguished secondary map. For a 4-byte load/store, the - top-level map is indexed not with (addr >> 16) but instead f(addr), - where - - f( XXXX XXXX XXXX XXXX ____ ____ ____ __YZ ) - = ____ ____ ____ __YZ XXXX XXXX XXXX XXXX or - = ____ ____ ____ __ZY XXXX XXXX XXXX XXXX - - ie the lowest two bits are placed above the 16 high address bits. - If either of these two bits are nonzero, the address is misaligned; - this will select a secondary map from the upper 3/4 of the primary - map. Because this is always the distinguished secondary map, a - (bogus) address check failure will result. The failure handling - code can then figure out whether this is a genuine addr check - failure or whether it is a possibly-legitimate access at a - misaligned address. -*/ - - -/*------------------------------------------------------------*/ -/*--- Crude profiling machinery. ---*/ -/*------------------------------------------------------------*/ - -#ifdef VG_PROFILE_MEMORY - -#define N_PROF_EVENTS 150 - -static UInt event_ctr[N_PROF_EVENTS]; - -static void init_prof_mem ( void ) -{ - Int i; - for (i = 0; i < N_PROF_EVENTS; i++) - event_ctr[i] = 0; -} - -void VG_(done_prof_mem) ( void ) -{ - Int i; - for (i = 0; i < N_PROF_EVENTS; i++) { - if ((i % 10) == 0) - VG_(printf)("\n"); - if (event_ctr[i] > 0) - VG_(printf)( "prof mem event %2d: %d\n", i, event_ctr[i] ); - } - VG_(printf)("\n"); -} - -#define PROF_EVENT(ev) \ - do { vg_assert((ev) >= 0 && (ev) < N_PROF_EVENTS); \ - event_ctr[ev]++; \ - } while (False); - -#else - -static void init_prof_mem ( void ) { } - void VG_(done_prof_mem) ( void ) { } - -#define PROF_EVENT(ev) /* */ - -#endif - -/* Event index. If just the name of the fn is given, this means the - number of calls to the fn. Otherwise it is the specified event. - - 10 alloc_secondary_map - - 20 get_abit - 21 get_vbyte - 22 set_abit - 23 set_vbyte - 24 get_abits4_ALIGNED - 25 get_vbytes4_ALIGNED - - 30 set_address_range_perms - 31 set_address_range_perms(lower byte loop) - 32 set_address_range_perms(quadword loop) - 33 set_address_range_perms(upper byte loop) - - 35 make_noaccess - 36 make_writable - 37 make_readable - - 40 copy_address_range_perms - 41 copy_address_range_perms(byte loop) - 42 check_writable - 43 check_writable(byte loop) - 44 check_readable - 45 check_readable(byte loop) - 46 check_readable_asciiz - 47 check_readable_asciiz(byte loop) - - 50 make_aligned_word_NOACCESS - 51 make_aligned_word_WRITABLE - - 60 helperc_LOADV4 - 61 helperc_STOREV4 - 62 helperc_LOADV2 - 63 helperc_STOREV2 - 64 helperc_LOADV1 - 65 helperc_STOREV1 - - 70 rim_rd_V4_SLOWLY - 71 rim_wr_V4_SLOWLY - 72 rim_rd_V2_SLOWLY - 73 rim_wr_V2_SLOWLY - 74 rim_rd_V1_SLOWLY - 75 rim_wr_V1_SLOWLY - - 80 fpu_read - 81 fpu_read aligned 4 - 82 fpu_read aligned 8 - 83 fpu_read 2 - 84 fpu_read 10 - - 85 fpu_write - 86 fpu_write aligned 4 - 87 fpu_write aligned 8 - 88 fpu_write 2 - 89 fpu_write 10 - - 90 fpu_read_check_SLOWLY - 91 fpu_read_check_SLOWLY(byte loop) - 92 fpu_write_check_SLOWLY - 93 fpu_write_check_SLOWLY(byte loop) - - 100 is_plausible_stack_addr - 101 handle_esp_assignment - 102 handle_esp_assignment(-4) - 103 handle_esp_assignment(+4) - 104 handle_esp_assignment(-12) - 105 handle_esp_assignment(-8) - 106 handle_esp_assignment(+16) - 107 handle_esp_assignment(+12) - 108 handle_esp_assignment(0) - 109 handle_esp_assignment(+8) - 110 handle_esp_assignment(-16) - 111 handle_esp_assignment(+20) - 112 handle_esp_assignment(-20) - 113 handle_esp_assignment(+24) - 114 handle_esp_assignment(-24) - - 120 vg_handle_esp_assignment_SLOWLY - 121 vg_handle_esp_assignment_SLOWLY(normal; move down) - 122 vg_handle_esp_assignment_SLOWLY(normal; move up) - 123 vg_handle_esp_assignment_SLOWLY(normal) - 124 vg_handle_esp_assignment_SLOWLY(>= HUGE_DELTA) -*/ - -/*------------------------------------------------------------*/ -/*--- Function declarations. ---*/ -/*------------------------------------------------------------*/ - -/* Set permissions for an address range. Not speed-critical. */ -void VGM_(make_noaccess) ( Addr a, UInt len ); -void VGM_(make_writable) ( Addr a, UInt len ); -void VGM_(make_readable) ( Addr a, UInt len ); - -/* Check permissions for an address range. Not speed-critical. */ -Bool VGM_(check_writable) ( Addr a, UInt len, Addr* bad_addr ); -Bool VGM_(check_readable) ( Addr a, UInt len, Addr* bad_addr ); -Bool VGM_(check_readable_asciiz) ( Addr a, Addr* bad_addr ); - -static UInt vgm_rd_V4_SLOWLY ( Addr a ); -static UInt vgm_rd_V2_SLOWLY ( Addr a ); -static UInt vgm_rd_V1_SLOWLY ( Addr a ); -static void vgm_wr_V4_SLOWLY ( Addr a, UInt vbytes ); -static void vgm_wr_V2_SLOWLY ( Addr a, UInt vbytes ); -static void vgm_wr_V1_SLOWLY ( Addr a, UInt vbytes ); -static void fpu_read_check_SLOWLY ( Addr addr, Int size ); -static void fpu_write_check_SLOWLY ( Addr addr, Int size ); - - -/*------------------------------------------------------------*/ -/*--- Data defns. ---*/ -/*------------------------------------------------------------*/ - -typedef - struct { - UChar abits[8192]; - UChar vbyte[65536]; - } - SecMap; - -/* These two are statically allocated. Should they be non-public? */ -SecMap* VG_(primary_map)[ /*65536*/ 262144 ]; -static SecMap vg_distinguished_secondary_map; - -#define IS_DISTINGUISHED_SM(smap) \ - ((smap) == &vg_distinguished_secondary_map) - -#define ENSURE_MAPPABLE(addr,caller) \ - do { \ - if (IS_DISTINGUISHED_SM(VG_(primary_map)[(addr) >> 16])) { \ - VG_(primary_map)[(addr) >> 16] = alloc_secondary_map(caller); \ - /* VG_(printf)("new 2map because of %p\n", addr); */ \ - } \ - } while(0) - -#define BITARR_SET(aaa_p,iii_p) \ - do { \ - UInt iii = (UInt)iii_p; \ - UChar* aaa = (UChar*)aaa_p; \ - aaa[iii >> 3] |= (1 << (iii & 7)); \ - } while (0) - -#define BITARR_CLEAR(aaa_p,iii_p) \ - do { \ - UInt iii = (UInt)iii_p; \ - UChar* aaa = (UChar*)aaa_p; \ - aaa[iii >> 3] &= ~(1 << (iii & 7)); \ - } while (0) - -#define BITARR_TEST(aaa_p,iii_p) \ - (0 != (((UChar*)aaa_p)[ ((UInt)iii_p) >> 3 ] \ - & (1 << (((UInt)iii_p) & 7)))) \ - - -#define VGM_BIT_VALID 0 -#define VGM_BIT_INVALID 1 - -#define VGM_NIBBLE_VALID 0 -#define VGM_NIBBLE_INVALID 0xF - -#define VGM_BYTE_VALID 0 -#define VGM_BYTE_INVALID 0xFF - -/* Now in vg_include.h. -#define VGM_WORD_VALID 0 -#define VGM_WORD_INVALID 0xFFFFFFFF -*/ - -#define VGM_EFLAGS_VALID 0xFFFFFFFE -#define VGM_EFLAGS_INVALID 0xFFFFFFFF - - -#define IS_ALIGNED4_ADDR(aaa_p) (0 == (((UInt)(aaa_p)) & 3)) - - -/*------------------------------------------------------------*/ -/*--- Basic bitmap management, reading and writing. ---*/ -/*------------------------------------------------------------*/ - -/* Allocate and initialise a secondary map. */ - -static SecMap* alloc_secondary_map ( __attribute__ ((unused)) - Char* caller ) -{ - SecMap* map; - UInt i; - PROF_EVENT(10); - - /* Mark all bytes as invalid access and invalid value. */ - - /* It just happens that a SecMap occupies exactly 18 pages -- - although this isn't important, so the following assert is - spurious. */ - vg_assert(0 == (sizeof(SecMap) % VKI_BYTES_PER_PAGE)); - map = VG_(get_memory_from_mmap)( sizeof(SecMap), caller ); - - for (i = 0; i < 8192; i++) - map->abits[i] = VGM_BYTE_INVALID; /* Invalid address */ - for (i = 0; i < 65536; i++) - map->vbyte[i] = VGM_BYTE_INVALID; /* Invalid Value */ - - /* VG_(printf)("ALLOC_2MAP(%s)\n", caller ); */ - return map; -} - - -/* Basic reading/writing of the bitmaps, for byte-sized accesses. */ - -static __inline__ UChar get_abit ( Addr a ) -{ - SecMap* sm = VG_(primary_map)[a >> 16]; - UInt sm_off = a & 0xFFFF; - PROF_EVENT(20); - return BITARR_TEST(sm->abits, sm_off) - ? VGM_BIT_INVALID : VGM_BIT_VALID; -} - -static __inline__ UChar get_vbyte ( Addr a ) -{ - SecMap* sm = VG_(primary_map)[a >> 16]; - UInt sm_off = a & 0xFFFF; - PROF_EVENT(21); - return sm->vbyte[sm_off]; -} - -static __inline__ void set_abit ( Addr a, UChar abit ) -{ - SecMap* sm; - UInt sm_off; - PROF_EVENT(22); - ENSURE_MAPPABLE(a, "set_abit"); - sm = VG_(primary_map)[a >> 16]; - sm_off = a & 0xFFFF; - if (abit) - BITARR_SET(sm->abits, sm_off); - else - BITARR_CLEAR(sm->abits, sm_off); -} - -static __inline__ void set_vbyte ( Addr a, UChar vbyte ) -{ - SecMap* sm; - UInt sm_off; - PROF_EVENT(23); - ENSURE_MAPPABLE(a, "set_vbyte"); - sm = VG_(primary_map)[a >> 16]; - sm_off = a & 0xFFFF; - sm->vbyte[sm_off] = vbyte; -} - - -/* Reading/writing of the bitmaps, for aligned word-sized accesses. */ - -static __inline__ UChar get_abits4_ALIGNED ( Addr a ) -{ - SecMap* sm; - UInt sm_off; - UChar abits8; - PROF_EVENT(24); -# ifdef VG_DEBUG_MEMORY - vg_assert(IS_ALIGNED4_ADDR(a)); -# endif - sm = VG_(primary_map)[a >> 16]; - sm_off = a & 0xFFFF; - abits8 = sm->abits[sm_off >> 3]; - abits8 >>= (a & 4 /* 100b */); /* a & 4 is either 0 or 4 */ - abits8 &= 0x0F; - return abits8; -} - -static UInt __inline__ get_vbytes4_ALIGNED ( Addr a ) -{ - SecMap* sm = VG_(primary_map)[a >> 16]; - UInt sm_off = a & 0xFFFF; - PROF_EVENT(25); -# ifdef VG_DEBUG_MEMORY - vg_assert(IS_ALIGNED4_ADDR(a)); -# endif - return ((UInt*)(sm->vbyte))[sm_off >> 2]; -} - - -/*------------------------------------------------------------*/ -/*--- Setting permissions over address ranges. ---*/ -/*------------------------------------------------------------*/ - -static void set_address_range_perms ( Addr a, UInt len, - UInt example_a_bit, - UInt example_v_bit ) -{ - UChar vbyte, abyte8; - UInt vword4, sm_off; - SecMap* sm; - - PROF_EVENT(30); - - if (len == 0) - return; - - if (len > 100 * 1000 * 1000) - VG_(message)(Vg_UserMsg, - "Warning: set address range perms: " - "large range %d, a %d, v %d", - len, example_a_bit, example_v_bit ); - - VGP_PUSHCC(VgpSARP); - - /* Requests to change permissions of huge address ranges may - indicate bugs in our machinery. 30,000,000 is arbitrary, but so - far all legitimate requests have fallen beneath that size. */ - /* 4 Mar 02: this is just stupid; get rid of it. */ - /* vg_assert(len < 30000000); */ - - /* Check the permissions make sense. */ - vg_assert(example_a_bit == VGM_BIT_VALID - || example_a_bit == VGM_BIT_INVALID); - vg_assert(example_v_bit == VGM_BIT_VALID - || example_v_bit == VGM_BIT_INVALID); - if (example_a_bit == VGM_BIT_INVALID) - vg_assert(example_v_bit == VGM_BIT_INVALID); - - /* The validity bits to write. */ - vbyte = example_v_bit==VGM_BIT_VALID - ? VGM_BYTE_VALID : VGM_BYTE_INVALID; - - /* In order that we can charge through the address space at 8 - bytes/main-loop iteration, make up some perms. */ - abyte8 = (example_a_bit << 7) - | (example_a_bit << 6) - | (example_a_bit << 5) - | (example_a_bit << 4) - | (example_a_bit << 3) - | (example_a_bit << 2) - | (example_a_bit << 1) - | (example_a_bit << 0); - vword4 = (vbyte << 24) | (vbyte << 16) | (vbyte << 8) | vbyte; - -# ifdef VG_DEBUG_MEMORY - /* Do it ... */ - while (True) { - PROF_EVENT(31); - if (len == 0) break; - set_abit ( a, example_a_bit ); - set_vbyte ( a, vbyte ); - a++; - len--; - } - -# else - /* Slowly do parts preceding 8-byte alignment. */ - while (True) { - PROF_EVENT(31); - if (len == 0) break; - if ((a % 8) == 0) break; - set_abit ( a, example_a_bit ); - set_vbyte ( a, vbyte ); - a++; - len--; - } - - if (len == 0) { - VGP_POPCC; - return; - } - vg_assert((a % 8) == 0 && len > 0); - - /* Once aligned, go fast. */ - while (True) { - PROF_EVENT(32); - if (len < 8) break; - ENSURE_MAPPABLE(a, "set_address_range_perms(fast)"); - sm = VG_(primary_map)[a >> 16]; - sm_off = a & 0xFFFF; - sm->abits[sm_off >> 3] = abyte8; - ((UInt*)(sm->vbyte))[(sm_off >> 2) + 0] = vword4; - ((UInt*)(sm->vbyte))[(sm_off >> 2) + 1] = vword4; - a += 8; - len -= 8; - } - - if (len == 0) { - VGP_POPCC; - return; - } - vg_assert((a % 8) == 0 && len > 0 && len < 8); - - /* Finish the upper fragment. */ - while (True) { - PROF_EVENT(33); - if (len == 0) break; - set_abit ( a, example_a_bit ); - set_vbyte ( a, vbyte ); - a++; - len--; - } -# endif - - /* Check that zero page and highest page have not been written to - -- this could happen with buggy syscall wrappers. Today - (2001-04-26) had precisely such a problem with - __NR_setitimer. */ - vg_assert(VG_(first_and_last_secondaries_look_plausible)()); - VGP_POPCC; -} - - -/* Set permissions for address ranges ... */ - -void VGM_(make_noaccess) ( Addr a, UInt len ) -{ - PROF_EVENT(35); - set_address_range_perms ( a, len, VGM_BIT_INVALID, VGM_BIT_INVALID ); -} - -void VGM_(make_writable) ( Addr a, UInt len ) -{ - PROF_EVENT(36); - set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_INVALID ); -} - -void VGM_(make_readable) ( Addr a, UInt len ) -{ - PROF_EVENT(37); - set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_VALID ); -} - -void VGM_(make_readwritable) ( Addr a, UInt len ) -{ - PROF_EVENT(38); - set_address_range_perms ( a, len, VGM_BIT_VALID, VGM_BIT_VALID ); -} - -/* Block-copy permissions (needed for implementing realloc()). */ - -void VGM_(copy_address_range_perms) ( Addr src, Addr dst, UInt len ) -{ - UInt i; - PROF_EVENT(40); - for (i = 0; i < len; i++) { - UChar abit = get_abit ( src+i ); - UChar vbyte = get_vbyte ( src+i ); - PROF_EVENT(41); - set_abit ( dst+i, abit ); - set_vbyte ( dst+i, vbyte ); - } -} - - -/* Check permissions for address range. If inadequate permissions - exist, *bad_addr is set to the offending address, so the caller can - know what it is. */ - -Bool VGM_(check_writable) ( Addr a, UInt len, Addr* bad_addr ) -{ - UInt i; - UChar abit; - PROF_EVENT(42); - for (i = 0; i < len; i++) { - PROF_EVENT(43); - abit = get_abit(a); - if (abit == VGM_BIT_INVALID) { - if (bad_addr != NULL) *bad_addr = a; - return False; - } - a++; - } - return True; -} - -Bool VGM_(check_readable) ( Addr a, UInt len, Addr* bad_addr ) -{ - UInt i; - UChar abit; - UChar vbyte; - PROF_EVENT(44); - for (i = 0; i < len; i++) { - abit = get_abit(a); - vbyte = get_vbyte(a); - PROF_EVENT(45); - if (abit != VGM_BIT_VALID || vbyte != VGM_BYTE_VALID) { - if (bad_addr != NULL) *bad_addr = a; - return False; - } - a++; - } - return True; -} - - -/* Check a zero-terminated ascii string. Tricky -- don't want to - examine the actual bytes, to find the end, until we're sure it is - safe to do so. */ - -Bool VGM_(check_readable_asciiz) ( Addr a, Addr* bad_addr ) -{ - UChar abit; - UChar vbyte; - PROF_EVENT(46); - while (True) { - PROF_EVENT(47); - abit = get_abit(a); - vbyte = get_vbyte(a); - if (abit != VGM_BIT_VALID || vbyte != VGM_BYTE_VALID) { - if (bad_addr != NULL) *bad_addr = a; - return False; - } - /* Ok, a is safe to read. */ - if (* ((UChar*)a) == 0) return True; - a++; - } -} - - -/* Setting permissions for aligned words. This supports fast stack - operations. */ - -static __inline__ void make_aligned_word_NOACCESS ( Addr a ) -{ - SecMap* sm; - UInt sm_off; - UChar mask; - PROF_EVENT(50); -# ifdef VG_DEBUG_MEMORY - vg_assert(IS_ALIGNED4_ADDR(a)); -# endif - ENSURE_MAPPABLE(a, "make_aligned_word_NOACCESS"); - sm = VG_(primary_map)[a >> 16]; - sm_off = a & 0xFFFF; - ((UInt*)(sm->vbyte))[sm_off >> 2] = VGM_WORD_INVALID; - mask = 0x0F; - mask <<= (a & 4 /* 100b */); /* a & 4 is either 0 or 4 */ - /* mask now contains 1s where we wish to make address bits - invalid (1s). */ - sm->abits[sm_off >> 3] |= mask; -} - -static __inline__ void make_aligned_word_WRITABLE ( Addr a ) -{ - SecMap* sm; - UInt sm_off; - UChar mask; - PROF_EVENT(51); -# ifdef VG_DEBUG_MEMORY - vg_assert(IS_ALIGNED4_ADDR(a)); -# endif - ENSURE_MAPPABLE(a, "make_aligned_word_WRITABLE"); - sm = VG_(primary_map)[a >> 16]; - sm_off = a & 0xFFFF; - ((UInt*)(sm->vbyte))[sm_off >> 2] = VGM_WORD_INVALID; - mask = 0x0F; - mask <<= (a & 4 /* 100b */); /* a & 4 is either 0 or 4 */ - /* mask now contains 1s where we wish to make address bits - invalid (0s). */ - sm->abits[sm_off >> 3] &= ~mask; -} - - -/*------------------------------------------------------------*/ -/*--- Functions called directly from generated code. ---*/ -/*------------------------------------------------------------*/ - -static __inline__ UInt rotateRight16 ( UInt x ) -{ - /* Amazingly, gcc turns this into a single rotate insn. */ - return (x >> 16) | (x << 16); -} - - -static __inline__ UInt shiftRight16 ( UInt x ) -{ - return x >> 16; -} - - -/* Read/write 1/2/4 sized V bytes, and emit an address error if - needed. */ - -/* VG_(helperc_{LD,ST}V{1,2,4}) handle the common case fast. - Under all other circumstances, it defers to the relevant _SLOWLY - function, which can handle all situations. -*/ -UInt VG_(helperc_LOADV4) ( Addr a ) -{ -# ifdef VG_DEBUG_MEMORY - return vgm_rd_V4_SLOWLY(a); -# else - UInt sec_no = rotateRight16(a) & 0x3FFFF; - SecMap* sm = VG_(primary_map)[sec_no]; - UInt a_off = (a & 0xFFFF) >> 3; - UChar abits = sm->abits[a_off]; - abits >>= (a & 4); - abits &= 15; - PROF_EVENT(60); - if (abits == VGM_NIBBLE_VALID) { - /* Handle common case quickly: a is suitably aligned, is mapped, - and is addressible. */ - UInt v_off = a & 0xFFFF; - return ((UInt*)(sm->vbyte))[ v_off >> 2 ]; - } else { - /* Slow but general case. */ - return vgm_rd_V4_SLOWLY(a); - } -# endif -} - -void VG_(helperc_STOREV4) ( Addr a, UInt vbytes ) -{ -# ifdef VG_DEBUG_MEMORY - vgm_wr_V4_SLOWLY(a, vbytes); -# else - UInt sec_no = rotateRight16(a) & 0x3FFFF; - SecMap* sm = VG_(primary_map)[sec_no]; - UInt a_off = (a & 0xFFFF) >> 3; - UChar abits = sm->abits[a_off]; - abits >>= (a & 4); - abits &= 15; - PROF_EVENT(61); - if (abits == VGM_NIBBLE_VALID) { - /* Handle common case quickly: a is suitably aligned, is mapped, - and is addressible. */ - UInt v_off = a & 0xFFFF; - ((UInt*)(sm->vbyte))[ v_off >> 2 ] = vbytes; - } else { - /* Slow but general case. */ - vgm_wr_V4_SLOWLY(a, vbytes); - } -# endif -} - -UInt VG_(helperc_LOADV2) ( Addr a ) -{ -# ifdef VG_DEBUG_MEMORY - return vgm_rd_V2_SLOWLY(a); -# else - UInt sec_no = rotateRight16(a) & 0x1FFFF; - SecMap* sm = VG_(primary_map)[sec_no]; - UInt a_off = (a & 0xFFFF) >> 3; - PROF_EVENT(62); - if (sm->abits[a_off] == VGM_BYTE_VALID) { - /* Handle common case quickly. */ - UInt v_off = a & 0xFFFF; - return 0xFFFF0000 - | - (UInt)( ((UShort*)(sm->vbyte))[ v_off >> 1 ] ); - } else { - /* Slow but general case. */ - return vgm_rd_V2_SLOWLY(a); - } -# endif -} - -void VG_(helperc_STOREV2) ( Addr a, UInt vbytes ) -{ -# ifdef VG_DEBUG_MEMORY - vgm_wr_V2_SLOWLY(a, vbytes); -# else - UInt sec_no = rotateRight16(a) & 0x1FFFF; - SecMap* sm = VG_(primary_map)[sec_no]; - UInt a_off = (a & 0xFFFF) >> 3; - PROF_EVENT(63); - if (sm->abits[a_off] == VGM_BYTE_VALID) { - /* Handle common case quickly. */ - UInt v_off = a & 0xFFFF; - ((UShort*)(sm->vbyte))[ v_off >> 1 ] = vbytes & 0x0000FFFF; - } else { - /* Slow but general case. */ - vgm_wr_V2_SLOWLY(a, vbytes); - } -# endif -} - -UInt VG_(helperc_LOADV1) ( Addr a ) -{ -# ifdef VG_DEBUG_MEMORY - return vgm_rd_V1_SLOWLY(a); -# else - UInt sec_no = shiftRight16(a); - SecMap* sm = VG_(primary_map)[sec_no]; - UInt a_off = (a & 0xFFFF) >> 3; - PROF_EVENT(64); - if (sm->abits[a_off] == VGM_BYTE_VALID) { - /* Handle common case quickly. */ - UInt v_off = a & 0xFFFF; - return 0xFFFFFF00 - | - (UInt)( ((UChar*)(sm->vbyte))[ v_off ] ); - } else { - /* Slow but general case. */ - return vgm_rd_V1_SLOWLY(a); - } -# endif -} - -void VG_(helperc_STOREV1) ( Addr a, UInt vbytes ) -{ -# ifdef VG_DEBUG_MEMORY - vgm_wr_V1_SLOWLY(a, vbytes); -# else - UInt sec_no = shiftRight16(a); - SecMap* sm = VG_(primary_map)[sec_no]; - UInt a_off = (a & 0xFFFF) >> 3; - PROF_EVENT(65); - if (sm->abits[a_off] == VGM_BYTE_VALID) { - /* Handle common case quickly. */ - UInt v_off = a & 0xFFFF; - ((UChar*)(sm->vbyte))[ v_off ] = vbytes & 0x000000FF; - } else { - /* Slow but general case. */ - vgm_wr_V1_SLOWLY(a, vbytes); - } -# endif -} - - -/*------------------------------------------------------------*/ -/*--- Fallback functions to handle cases that the above ---*/ -/*--- VG_(helperc_{LD,ST}V{1,2,4}) can't manage. ---*/ -/*------------------------------------------------------------*/ - -static UInt vgm_rd_V4_SLOWLY ( Addr a ) -{ - Bool a0ok, a1ok, a2ok, a3ok; - UInt vb0, vb1, vb2, vb3; - - PROF_EVENT(70); - - /* First establish independently the addressibility of the 4 bytes - involved. */ - a0ok = get_abit(a+0) == VGM_BIT_VALID; - a1ok = get_abit(a+1) == VGM_BIT_VALID; - a2ok = get_abit(a+2) == VGM_BIT_VALID; - a3ok = get_abit(a+3) == VGM_BIT_VALID; - - /* Also get the validity bytes for the address. */ - vb0 = (UInt)get_vbyte(a+0); - vb1 = (UInt)get_vbyte(a+1); - vb2 = (UInt)get_vbyte(a+2); - vb3 = (UInt)get_vbyte(a+3); - - /* Now distinguish 3 cases */ - - /* Case 1: the address is completely valid, so: - - no addressing error - - return V bytes as read from memory - */ - if (a0ok && a1ok && a2ok && a3ok) { - UInt vw = VGM_WORD_INVALID; - vw <<= 8; vw |= vb3; - vw <<= 8; vw |= vb2; - vw <<= 8; vw |= vb1; - vw <<= 8; vw |= vb0; - return vw; - } - - /* Case 2: the address is completely invalid. - - emit addressing error - - return V word indicating validity. - This sounds strange, but if we make loads from invalid addresses - give invalid data, we also risk producing a number of confusing - undefined-value errors later, which confuses the fact that the - error arose in the first place from an invalid address. - */ - /* VG_(printf)("%p (%d %d %d %d)\n", a, a0ok, a1ok, a2ok, a3ok); */ - if (!VG_(clo_partial_loads_ok) - || ((a & 3) != 0) - || (!a0ok && !a1ok && !a2ok && !a3ok)) { - VG_(record_address_error)( a, 4, False ); - return (VGM_BYTE_VALID << 24) | (VGM_BYTE_VALID << 16) - | (VGM_BYTE_VALID << 8) | VGM_BYTE_VALID; - } - - /* Case 3: the address is partially valid. - - no addressing error - - returned V word is invalid where the address is invalid, - and contains V bytes from memory otherwise. - Case 3 is only allowed if VG_(clo_partial_loads_ok) is True - (which is the default), and the address is 4-aligned. - If not, Case 2 will have applied. - */ - vg_assert(VG_(clo_partial_loads_ok)); - { - UInt vw = VGM_WORD_INVALID; - vw <<= 8; vw |= (a3ok ? vb3 : VGM_BYTE_INVALID); - vw <<= 8; vw |= (a2ok ? vb2 : VGM_BYTE_INVALID); - vw <<= 8; vw |= (a1ok ? vb1 : VGM_BYTE_INVALID); - vw <<= 8; vw |= (a0ok ? vb0 : VGM_BYTE_INVALID); - return vw; - } -} - -static void vgm_wr_V4_SLOWLY ( Addr a, UInt vbytes ) -{ - /* Check the address for validity. */ - Bool aerr = False; - PROF_EVENT(71); - - if (get_abit(a+0) != VGM_BIT_VALID) aerr = True; - if (get_abit(a+1) != VGM_BIT_VALID) aerr = True; - if (get_abit(a+2) != VGM_BIT_VALID) aerr = True; - if (get_abit(a+3) != VGM_BIT_VALID) aerr = True; - - /* Store the V bytes, remembering to do it little-endian-ly. */ - set_vbyte( a+0, vbytes & 0x000000FF ); vbytes >>= 8; - set_vbyte( a+1, vbytes & 0x000000FF ); vbytes >>= 8; - set_vbyte( a+2, vbytes & 0x000000FF ); vbytes >>= 8; - set_vbyte( a+3, vbytes & 0x000000FF ); - - /* If an address error has happened, report it. */ - if (aerr) - VG_(record_address_error)( a, 4, True ); -} - -static UInt vgm_rd_V2_SLOWLY ( Addr a ) -{ - /* Check the address for validity. */ - UInt vw = VGM_WORD_INVALID; - Bool aerr = False; - PROF_EVENT(72); - - if (get_abit(a+0) != VGM_BIT_VALID) aerr = True; - if (get_abit(a+1) != VGM_BIT_VALID) aerr = True; - - /* Fetch the V bytes, remembering to do it little-endian-ly. */ - vw <<= 8; vw |= (UInt)get_vbyte(a+1); - vw <<= 8; vw |= (UInt)get_vbyte(a+0); - - /* If an address error has happened, report it. */ - if (aerr) { - VG_(record_address_error)( a, 2, False ); - vw = (VGM_BYTE_INVALID << 24) | (VGM_BYTE_INVALID << 16) - | (VGM_BYTE_VALID << 8) | (VGM_BYTE_VALID); - } - return vw; -} - -static void vgm_wr_V2_SLOWLY ( Addr a, UInt vbytes ) -{ - /* Check the address for validity. */ - Bool aerr = False; - PROF_EVENT(73); - - if (get_abit(a+0) != VGM_BIT_VALID) aerr = True; - if (get_abit(a+1) != VGM_BIT_VALID) aerr = True; - - /* Store the V bytes, remembering to do it little-endian-ly. */ - set_vbyte( a+0, vbytes & 0x000000FF ); vbytes >>= 8; - set_vbyte( a+1, vbytes & 0x000000FF ); - - /* If an address error has happened, report it. */ - if (aerr) - VG_(record_address_error)( a, 2, True ); -} - -static UInt vgm_rd_V1_SLOWLY ( Addr a ) -{ - /* Check the address for validity. */ - UInt vw = VGM_WORD_INVALID; - Bool aerr = False; - PROF_EVENT(74); - - if (get_abit(a+0) != VGM_BIT_VALID) aerr = True; - - /* Fetch the V byte. */ - vw <<= 8; vw |= (UInt)get_vbyte(a+0); - - /* If an address error has happened, report it. */ - if (aerr) { - VG_(record_address_error)( a, 1, False ); - vw = (VGM_BYTE_INVALID << 24) | (VGM_BYTE_INVALID << 16) - | (VGM_BYTE_INVALID << 8) | (VGM_BYTE_VALID); - } - return vw; -} - -static void vgm_wr_V1_SLOWLY ( Addr a, UInt vbytes ) -{ - /* Check the address for validity. */ - Bool aerr = False; - PROF_EVENT(75); - if (get_abit(a+0) != VGM_BIT_VALID) aerr = True; - - /* Store the V bytes, remembering to do it little-endian-ly. */ - set_vbyte( a+0, vbytes & 0x000000FF ); - - /* If an address error has happened, report it. */ - if (aerr) - VG_(record_address_error)( a, 1, True ); -} - - -/* --------------------------------------------------------------------- - Called from generated code, or from the assembly helpers. - Handlers for value check failures. - ------------------------------------------------------------------ */ - -void VG_(helperc_value_check0_fail) ( void ) -{ - VG_(record_value_error) ( 0 ); -} - -void VG_(helperc_value_check1_fail) ( void ) -{ - VG_(record_value_error) ( 1 ); -} - -void VG_(helperc_value_check2_fail) ( void ) -{ - VG_(record_value_error) ( 2 ); -} - -void VG_(helperc_value_check4_fail) ( void ) -{ - VG_(record_value_error) ( 4 ); -} - - -/* --------------------------------------------------------------------- - FPU load and store checks, called from generated code. - ------------------------------------------------------------------ */ - -void VGM_(fpu_read_check) ( Addr addr, Int size ) -{ - /* Ensure the read area is both addressible and valid (ie, - readable). If there's an address error, don't report a value - error too; but if there isn't an address error, check for a - value error. - - Try to be reasonably fast on the common case; wimp out and defer - to fpu_read_check_SLOWLY for everything else. */ - - SecMap* sm; - UInt sm_off, v_off, a_off; - Addr addr4; - - PROF_EVENT(80); - -# ifdef VG_DEBUG_MEMORY - fpu_read_check_SLOWLY ( addr, size ); -# else - - if (size == 4) { - if (!IS_ALIGNED4_ADDR(addr)) goto slow4; - PROF_EVENT(81); - /* Properly aligned. */ - sm = VG_(primary_map)[addr >> 16]; - sm_off = addr & 0xFFFF; - a_off = sm_off >> 3; - if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow4; - /* Properly aligned and addressible. */ - v_off = addr & 0xFFFF; - if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) - goto slow4; - /* Properly aligned, addressible and with valid data. */ - return; - slow4: - fpu_read_check_SLOWLY ( addr, 4 ); - return; - } - - if (size == 8) { - if (!IS_ALIGNED4_ADDR(addr)) goto slow8; - PROF_EVENT(82); - /* Properly aligned. Do it in two halves. */ - addr4 = addr + 4; - /* First half. */ - sm = VG_(primary_map)[addr >> 16]; - sm_off = addr & 0xFFFF; - a_off = sm_off >> 3; - if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8; - /* First half properly aligned and addressible. */ - v_off = addr & 0xFFFF; - if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) - goto slow8; - /* Second half. */ - sm = VG_(primary_map)[addr4 >> 16]; - sm_off = addr4 & 0xFFFF; - a_off = sm_off >> 3; - if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8; - /* Second half properly aligned and addressible. */ - v_off = addr4 & 0xFFFF; - if (((UInt*)(sm->vbyte))[ v_off >> 2 ] != VGM_WORD_VALID) - goto slow8; - /* Both halves properly aligned, addressible and with valid - data. */ - return; - slow8: - fpu_read_check_SLOWLY ( addr, 8 ); - return; - } - - /* Can't be bothered to huff'n'puff to make these (allegedly) rare - cases go quickly. */ - if (size == 2) { - PROF_EVENT(83); - fpu_read_check_SLOWLY ( addr, 2 ); - return; - } - - if (size == 10) { - PROF_EVENT(84); - fpu_read_check_SLOWLY ( addr, 10 ); - return; - } - - if (size == 28) { - PROF_EVENT(84); /* XXX assign correct event number */ - fpu_read_check_SLOWLY ( addr, 28 ); - return; - } - - VG_(printf)("size is %d\n", size); - VG_(panic)("vgm_fpu_read_check: unhandled size"); -# endif -} - - -void VGM_(fpu_write_check) ( Addr addr, Int size ) -{ - /* Ensure the written area is addressible, and moan if otherwise. - If it is addressible, make it valid, otherwise invalid. - */ - - SecMap* sm; - UInt sm_off, v_off, a_off; - Addr addr4; - - PROF_EVENT(85); - -# ifdef VG_DEBUG_MEMORY - fpu_write_check_SLOWLY ( addr, size ); -# else - - if (size == 4) { - if (!IS_ALIGNED4_ADDR(addr)) goto slow4; - PROF_EVENT(86); - /* Properly aligned. */ - sm = VG_(primary_map)[addr >> 16]; - sm_off = addr & 0xFFFF; - a_off = sm_off >> 3; - if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow4; - /* Properly aligned and addressible. Make valid. */ - v_off = addr & 0xFFFF; - ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID; - return; - slow4: - fpu_write_check_SLOWLY ( addr, 4 ); - return; - } - - if (size == 8) { - if (!IS_ALIGNED4_ADDR(addr)) goto slow8; - PROF_EVENT(87); - /* Properly aligned. Do it in two halves. */ - addr4 = addr + 4; - /* First half. */ - sm = VG_(primary_map)[addr >> 16]; - sm_off = addr & 0xFFFF; - a_off = sm_off >> 3; - if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8; - /* First half properly aligned and addressible. Make valid. */ - v_off = addr & 0xFFFF; - ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID; - /* Second half. */ - sm = VG_(primary_map)[addr4 >> 16]; - sm_off = addr4 & 0xFFFF; - a_off = sm_off >> 3; - if (sm->abits[a_off] != VGM_BYTE_VALID) goto slow8; - /* Second half properly aligned and addressible. */ - v_off = addr4 & 0xFFFF; - ((UInt*)(sm->vbyte))[ v_off >> 2 ] = VGM_WORD_VALID; - /* Properly aligned, addressible and with valid data. */ - return; - slow8: - fpu_write_check_SLOWLY ( addr, 8 ); - return; - } - - /* Can't be bothered to huff'n'puff to make these (allegedly) rare - cases go quickly. */ - if (size == 2) { - PROF_EVENT(88); - fpu_write_check_SLOWLY ( addr, 2 ); - return; - } - - if (size == 10) { - PROF_EVENT(89); - fpu_write_check_SLOWLY ( addr, 10 ); - return; - } - - if (size == 28) { - PROF_EVENT(89); /* XXX assign correct event number */ - fpu_write_check_SLOWLY ( addr, 28 ); - return; - } - - VG_(printf)("size is %d\n", size); - VG_(panic)("vgm_fpu_write_check: unhandled size"); -# endif -} - - -/* --------------------------------------------------------------------- - Slow, general cases for FPU load and store checks. - ------------------------------------------------------------------ */ - -/* Generic version. Test for both addr and value errors, but if - there's an addr error, don't report a value error even if it - exists. */ - -void fpu_read_check_SLOWLY ( Addr addr, Int size ) -{ - Int i; - Bool aerr = False; - Bool verr = False; - PROF_EVENT(90); - for (i = 0; i < size; i++) { - PROF_EVENT(91); - if (get_abit(addr+i) != VGM_BIT_VALID) - aerr = True; - if (get_vbyte(addr+i) != VGM_BYTE_VALID) - verr = True; - } - - if (aerr) { - VG_(record_address_error)( addr, size, False ); - } else { - if (verr) - VG_(record_value_error)( size ); - } -} - - -/* Generic version. Test for addr errors. Valid addresses are - given valid values, and invalid addresses invalid values. */ - -void fpu_write_check_SLOWLY ( Addr addr, Int size ) -{ - Int i; - Addr a_here; - Bool a_ok; - Bool aerr = False; - PROF_EVENT(92); - for (i = 0; i < size; i++) { - PROF_EVENT(93); - a_here = addr+i; - a_ok = get_abit(a_here) == VGM_BIT_VALID; - if (a_ok) { - set_vbyte(a_here, VGM_BYTE_VALID); - } else { - set_vbyte(a_here, VGM_BYTE_INVALID); - aerr = True; - } - } - if (aerr) { - VG_(record_address_error)( addr, size, True ); - } -} - - -/*------------------------------------------------------------*/ -/*--- Tracking permissions around %esp changes. ---*/ -/*------------------------------------------------------------*/ - -/* - The stack - ~~~~~~~~~ - The stack's segment seems to be dynamically extended downwards - by the kernel as the stack pointer moves down. Initially, a - 1-page (4k) stack is allocated. When %esp moves below that for - the first time, presumably a page fault occurs. The kernel - detects that the faulting address is in the range from %esp upwards - to the current valid stack. It then extends the stack segment - downwards for enough to cover the faulting address, and resumes - the process (invisibly). The process is unaware of any of this. - - That means that Valgrind can't spot when the stack segment is - being extended. Fortunately, we want to precisely and continuously - update stack permissions around %esp, so we need to spot all - writes to %esp anyway. - - The deal is: when %esp is assigned a lower value, the stack is - being extended. Create a secondary maps to fill in any holes - between the old stack ptr and this one, if necessary. Then - mark all bytes in the area just "uncovered" by this %esp change - as write-only. - - When %esp goes back up, mark the area receded over as unreadable - and unwritable. - - Just to record the %esp boundary conditions somewhere convenient: - %esp always points to the lowest live byte in the stack. All - addresses below %esp are not live; those at and above it are. -*/ - -/* Does this address look like something in or vaguely near the - current thread's stack? */ -static -Bool is_plausible_stack_addr ( ThreadState* tst, Addr aa ) -{ - UInt a = (UInt)aa; - PROF_EVENT(100); - if (a <= tst->stack_highest_word && - a > tst->stack_highest_word - VG_PLAUSIBLE_STACK_SIZE) - return True; - else - return False; -} - - -/* Is this address within some small distance below %ESP? Used only - for the --workaround-gcc296-bugs kludge. */ -Bool VG_(is_just_below_ESP)( Addr esp, Addr aa ) -{ - if ((UInt)esp > (UInt)aa - && ((UInt)esp - (UInt)aa) <= VG_GCC296_BUG_STACK_SLOP) - return True; - else - return False; -} - - -/* Kludgey ... how much does %esp have to change before we reckon that - the application is switching stacks ? */ -#define VG_HUGE_DELTA (VG_PLAUSIBLE_STACK_SIZE / 4) - -static Addr get_page_base ( Addr a ) -{ - return a & ~(VKI_BYTES_PER_PAGE-1); -} - - -static void vg_handle_esp_assignment_SLOWLY ( Addr ); - -void VGM_(handle_esp_assignment) ( Addr new_espA ) -{ - UInt old_esp = VG_(baseBlock)[VGOFF_(m_esp)]; - UInt new_esp = (UInt)new_espA; - Int delta = ((Int)new_esp) - ((Int)old_esp); - - PROF_EVENT(101); - -# ifndef VG_DEBUG_MEMORY - - if (IS_ALIGNED4_ADDR(old_esp)) { - - /* Deal with the most common cases fast. These are ordered in - the sequence most common first. */ - - if (delta == -4) { - /* Moving down by 4 and properly aligned.. */ - PROF_EVENT(102); - make_aligned_word_WRITABLE(new_esp); - return; - } - - if (delta == 4) { - /* Moving up by 4 and properly aligned. */ - PROF_EVENT(103); - make_aligned_word_NOACCESS(old_esp); - return; - } - - if (delta == -12) { - PROF_EVENT(104); - make_aligned_word_WRITABLE(new_esp); - make_aligned_word_WRITABLE(new_esp+4); - make_aligned_word_WRITABLE(new_esp+8); - return; - } - - if (delta == -8) { - PROF_EVENT(105); - make_aligned_word_WRITABLE(new_esp); - make_aligned_word_WRITABLE(new_esp+4); - return; - } - - if (delta == 16) { - PROF_EVENT(106); - make_aligned_word_NOACCESS(old_esp); - make_aligned_word_NOACCESS(old_esp+4); - make_aligned_word_NOACCESS(old_esp+8); - make_aligned_word_NOACCESS(old_esp+12); - return; - } - - if (delta == 12) { - PROF_EVENT(107); - make_aligned_word_NOACCESS(old_esp); - make_aligned_word_NOACCESS(old_esp+4); - make_aligned_word_NOACCESS(old_esp+8); - return; - } - - if (delta == 0) { - PROF_EVENT(108); - return; - } - - if (delta == 8) { - PROF_EVENT(109); - make_aligned_word_NOACCESS(old_esp); - make_aligned_word_NOACCESS(old_esp+4); - return; - } - - if (delta == -16) { - PROF_EVENT(110); - make_aligned_word_WRITABLE(new_esp); - make_aligned_word_WRITABLE(new_esp+4); - make_aligned_word_WRITABLE(new_esp+8); - make_aligned_word_WRITABLE(new_esp+12); - return; - } - - if (delta == 20) { - PROF_EVENT(111); - make_aligned_word_NOACCESS(old_esp); - make_aligned_word_NOACCESS(old_esp+4); - make_aligned_word_NOACCESS(old_esp+8); - make_aligned_word_NOACCESS(old_esp+12); - make_aligned_word_NOACCESS(old_esp+16); - return; - } - - if (delta == -20) { - PROF_EVENT(112); - make_aligned_word_WRITABLE(new_esp); - make_aligned_word_WRITABLE(new_esp+4); - make_aligned_word_WRITABLE(new_esp+8); - make_aligned_word_WRITABLE(new_esp+12); - make_aligned_word_WRITABLE(new_esp+16); - return; - } - - if (delta == 24) { - PROF_EVENT(113); - make_aligned_word_NOACCESS(old_esp); - make_aligned_word_NOACCESS(old_esp+4); - make_aligned_word_NOACCESS(old_esp+8); - make_aligned_word_NOACCESS(old_esp+12); - make_aligned_word_NOACCESS(old_esp+16); - make_aligned_word_NOACCESS(old_esp+20); - return; - } - - if (delta == -24) { - PROF_EVENT(114); - make_aligned_word_WRITABLE(new_esp); - make_aligned_word_WRITABLE(new_esp+4); - make_aligned_word_WRITABLE(new_esp+8); - make_aligned_word_WRITABLE(new_esp+12); - make_aligned_word_WRITABLE(new_esp+16); - make_aligned_word_WRITABLE(new_esp+20); - return; - } - - } - -# endif - - /* The above special cases handle 90% to 95% of all the stack - adjustments. The rest we give to the slow-but-general - mechanism. */ - vg_handle_esp_assignment_SLOWLY ( new_espA ); -} - - -static void vg_handle_esp_assignment_SLOWLY ( Addr new_espA ) -{ - UInt old_esp = VG_(baseBlock)[VGOFF_(m_esp)]; - UInt new_esp = (UInt)new_espA; - Int delta = ((Int)new_esp) - ((Int)old_esp); - // VG_(printf)("%d ", delta); - PROF_EVENT(120); - if (-(VG_HUGE_DELTA) < delta && delta < VG_HUGE_DELTA) { - /* "Ordinary" stack change. */ - if (new_esp < old_esp) { - /* Moving down; the stack is growing. */ - PROF_EVENT(121); - VGM_(make_writable) ( new_esp, old_esp - new_esp ); - return; - } - if (new_esp > old_esp) { - /* Moving up; the stack is shrinking. */ - PROF_EVENT(122); - VGM_(make_noaccess) ( old_esp, new_esp - old_esp ); - return; - } - PROF_EVENT(123); - return; /* when old_esp == new_esp */ - } - - /* %esp has changed by more than HUGE_DELTA. We take this to mean - that the application is switching to a new stack, for whatever - reason, and we attempt to initialise the permissions around the - new stack in some plausible way. All pretty kludgey; needed to - make netscape-4.07 run without generating thousands of error - contexts. - - If we appear to be switching back to the main stack, don't mess - with the permissions in the area at and above the stack ptr. - Otherwise, we're switching to an alternative stack; make the - area above %esp readable -- this doesn't seem right -- the right - thing to do would be to make it writable -- but is needed to - avoid huge numbers of errs in netscape. To be investigated. */ - - { Addr invalid_down_to = get_page_base(new_esp) - - 0 * VKI_BYTES_PER_PAGE; - Addr valid_up_to = get_page_base(new_esp) + VKI_BYTES_PER_PAGE - + 0 * VKI_BYTES_PER_PAGE; - ThreadState* tst = VG_(get_current_thread_state)(); - PROF_EVENT(124); - if (VG_(clo_verbosity) > 1) - VG_(message)(Vg_UserMsg, "Warning: client switching stacks? " - "%%esp: %p --> %p", - old_esp, new_esp); - /* VG_(printf)("na %p, %%esp %p, wr %p\n", - invalid_down_to, new_esp, valid_up_to ); */ - VGM_(make_noaccess) ( invalid_down_to, new_esp - invalid_down_to ); - if (!is_plausible_stack_addr(tst, new_esp)) { - VGM_(make_readable) ( new_esp, valid_up_to - new_esp ); - } - } -} - - -/*--------------------------------------------------------------*/ -/*--- Initialise the memory audit system on program startup. ---*/ -/*--------------------------------------------------------------*/ - -/* Handle one entry derived from /proc/self/maps. */ - -static -void init_memory_audit_callback ( - Addr start, UInt size, - Char rr, Char ww, Char xx, - UInt foffset, UChar* filename ) -{ - UChar example_a_bit; - UChar example_v_bit; - UInt r_esp; - Bool is_stack_segment; - - /* Sanity check ... if this is the executable's text segment, - ensure it is loaded where we think it ought to be. Any file - name which doesn't contain ".so" is assumed to be the - executable. */ - if (filename != NULL - && xx == 'x' - && VG_(strstr(filename, ".so")) == NULL - ) { - /* We assume this is the executable. */ - if (start != VG_ASSUMED_EXE_BASE) { - VG_(message)(Vg_UserMsg, - "FATAL: executable base addr not as assumed."); - VG_(message)(Vg_UserMsg, "name %s, actual %p, assumed %p.", - filename, start, VG_ASSUMED_EXE_BASE); - VG_(message)(Vg_UserMsg, - "One reason this could happen is that you have a shared object"); - VG_(message)(Vg_UserMsg, - " whose name doesn't contain the characters \".so\", so Valgrind "); - VG_(message)(Vg_UserMsg, - "naively assumes it is the executable. "); - VG_(message)(Vg_UserMsg, - "In that case, rename it appropriately."); - VG_(panic)("VG_ASSUMED_EXE_BASE doesn't match reality"); - } - } - - if (0) - VG_(message)(Vg_DebugMsg, - "initial map %8x-%8x %c%c%c? %8x (%d) (%s)", - start,start+size,rr,ww,xx,foffset, - size, filename?filename:(UChar*)"NULL"); - - r_esp = VG_(baseBlock)[VGOFF_(m_esp)]; - is_stack_segment = start <= r_esp && r_esp < start+size; - - /* Figure out the segment's permissions. - - All segments are addressible -- since a process can read its - own text segment. - - A read-but-not-write segment presumably contains initialised - data, so is all valid. Read-write segments presumably contains - uninitialised data, so is all invalid. */ - - /* ToDo: make this less bogus. */ - if (rr != 'r' && xx != 'x' && ww != 'w') { - /* Very bogus; this path never gets taken. */ - /* A no, V no */ - example_a_bit = VGM_BIT_INVALID; - example_v_bit = VGM_BIT_INVALID; - } else { - /* A yes, V yes */ - example_a_bit = VGM_BIT_VALID; - example_v_bit = VGM_BIT_VALID; - /* Causes a lot of errs for unknown reasons. - if (filename is valgrind.so - [careful about end conditions on filename]) { - example_a_bit = VGM_BIT_INVALID; - example_v_bit = VGM_BIT_INVALID; - } - */ - } - - set_address_range_perms ( start, size, - example_a_bit, example_v_bit ); - - if (is_stack_segment) { - /* This is the stack segment. Mark all below %esp as - noaccess. */ - if (0) - VG_(message)(Vg_DebugMsg, - "invalidating stack area: %x .. %x", - start,r_esp); - VGM_(make_noaccess)( start, r_esp-start ); - } -} - - -/* Initialise the memory audit system. */ -void VGM_(init_memory_audit) ( void ) -{ - Int i; - - init_prof_mem(); - - for (i = 0; i < 8192; i++) - vg_distinguished_secondary_map.abits[i] - = VGM_BYTE_INVALID; /* Invalid address */ - for (i = 0; i < 65536; i++) - vg_distinguished_secondary_map.vbyte[i] - = VGM_BYTE_INVALID; /* Invalid Value */ - - /* These entries gradually get overwritten as the used address - space expands. */ - for (i = 0; i < 65536; i++) - VG_(primary_map)[i] = &vg_distinguished_secondary_map; - /* These ones should never change; it's a bug in Valgrind if they - do. */ - for (i = 65536; i < 262144; i++) - VG_(primary_map)[i] = &vg_distinguished_secondary_map; - - /* Read the initial memory mapping from the /proc filesystem, and - set up our own maps accordingly. */ - VG_(read_procselfmaps) ( init_memory_audit_callback ); - - /* Last but not least, set up the shadow regs with reasonable (sic) - values. All regs are claimed to have valid values. - */ - VG_(baseBlock)[VGOFF_(sh_esp)] = VGM_WORD_VALID; - VG_(baseBlock)[VGOFF_(sh_ebp)] = VGM_WORD_VALID; - VG_(baseBlock)[VGOFF_(sh_eax)] = VGM_WORD_VALID; - VG_(baseBlock)[VGOFF_(sh_ecx)] = VGM_WORD_VALID; - VG_(baseBlock)[VGOFF_(sh_edx)] = VGM_WORD_VALID; - VG_(baseBlock)[VGOFF_(sh_ebx)] = VGM_WORD_VALID; - VG_(baseBlock)[VGOFF_(sh_esi)] = VGM_WORD_VALID; - VG_(baseBlock)[VGOFF_(sh_edi)] = VGM_WORD_VALID; - VG_(baseBlock)[VGOFF_(sh_eflags)] = VGM_EFLAGS_VALID; - - /* Record the end of the data segment, so that vg_syscall_mem.c - can make sense of calls to brk(). - */ - VGM_(curr_dataseg_end) = (Addr)VG_(brk)(0); - if (VGM_(curr_dataseg_end) == (Addr)(-1)) - VG_(panic)("vgm_init_memory_audit: can't determine data-seg end"); - - if (0) - VG_(printf)("DS END is %p\n", (void*)VGM_(curr_dataseg_end)); - - /* Read the list of errors to suppress. This should be found in - the file specified by vg_clo_suppressions. */ - VG_(load_suppressions)(); -} - - -/*------------------------------------------------------------*/ -/*--- Low-level address-space scanning, for the leak ---*/ -/*--- detector. ---*/ -/*------------------------------------------------------------*/ - -static -jmp_buf memscan_jmpbuf; - -static -void vg_scan_all_valid_memory_sighandler ( Int sigNo ) -{ - __builtin_longjmp(memscan_jmpbuf, 1); -} - -UInt VG_(scan_all_valid_memory) ( void (*notify_word)( Addr, UInt ) ) -{ - /* All volatile, because some gccs seem paranoid about longjmp(). */ - volatile UInt res, numPages, page, vbytes, primaryMapNo, nWordsNotified; - volatile Addr pageBase, addr; - volatile SecMap* sm; - volatile UChar abits; - volatile UInt page_first_word; - - vki_ksigaction sigbus_saved; - vki_ksigaction sigbus_new; - vki_ksigaction sigsegv_saved; - vki_ksigaction sigsegv_new; - vki_ksigset_t blockmask_saved; - vki_ksigset_t unblockmask_new; - - /* Temporarily install a new sigsegv and sigbus handler, and make - sure SIGBUS, SIGSEGV and SIGTERM are unblocked. (Perhaps the - first two can never be blocked anyway?) */ - - sigbus_new.ksa_handler = vg_scan_all_valid_memory_sighandler; - sigbus_new.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART; - sigbus_new.ksa_restorer = NULL; - res = VG_(ksigemptyset)( &sigbus_new.ksa_mask ); - vg_assert(res == 0); - - sigsegv_new.ksa_handler = vg_scan_all_valid_memory_sighandler; - sigsegv_new.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART; - sigsegv_new.ksa_restorer = NULL; - res = VG_(ksigemptyset)( &sigsegv_new.ksa_mask ); - vg_assert(res == 0+0); - - res = VG_(ksigemptyset)( &unblockmask_new ); - res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGBUS ); - res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGSEGV ); - res |= VG_(ksigaddset)( &unblockmask_new, VKI_SIGTERM ); - vg_assert(res == 0+0+0); - - res = VG_(ksigaction)( VKI_SIGBUS, &sigbus_new, &sigbus_saved ); - vg_assert(res == 0+0+0+0); - - res = VG_(ksigaction)( VKI_SIGSEGV, &sigsegv_new, &sigsegv_saved ); - vg_assert(res == 0+0+0+0+0); - - res = VG_(ksigprocmask)( VKI_SIG_UNBLOCK, &unblockmask_new, &blockmask_saved ); - vg_assert(res == 0+0+0+0+0+0); - - /* The signal handlers are installed. Actually do the memory scan. */ - numPages = 1 << (32-VKI_BYTES_PER_PAGE_BITS); - vg_assert(numPages == 1048576); - vg_assert(4096 == (1 << VKI_BYTES_PER_PAGE_BITS)); - - nWordsNotified = 0; - - for (page = 0; page < numPages; page++) { - pageBase = page << VKI_BYTES_PER_PAGE_BITS; - primaryMapNo = pageBase >> 16; - sm = VG_(primary_map)[primaryMapNo]; - if (IS_DISTINGUISHED_SM(sm)) continue; - if (__builtin_setjmp(memscan_jmpbuf) == 0) { - /* try this ... */ - page_first_word = * (volatile UInt*)pageBase; - /* we get here if we didn't get a fault */ - /* Scan the page */ - for (addr = pageBase; addr < pageBase+VKI_BYTES_PER_PAGE; addr += 4) { - abits = get_abits4_ALIGNED(addr); - vbytes = get_vbytes4_ALIGNED(addr); - if (abits == VGM_NIBBLE_VALID - && vbytes == VGM_WORD_VALID) { - nWordsNotified++; - notify_word ( addr, *(UInt*)addr ); - } - } - } else { - /* We get here if reading the first word of the page caused a - fault, which in turn caused the signal handler to longjmp. - Ignore this page. */ - if (0) - VG_(printf)( - "vg_scan_all_valid_memory_sighandler: ignoring page at %p\n", - (void*)pageBase - ); - } - } - - /* Restore signal state to whatever it was before. */ - res = VG_(ksigaction)( VKI_SIGBUS, &sigbus_saved, NULL ); - vg_assert(res == 0 +0); - - res = VG_(ksigaction)( VKI_SIGSEGV, &sigsegv_saved, NULL ); - vg_assert(res == 0 +0 +0); - - res = VG_(ksigprocmask)( VKI_SIG_SETMASK, &blockmask_saved, NULL ); - vg_assert(res == 0 +0 +0 +0); - - return nWordsNotified; -} - - -/*------------------------------------------------------------*/ -/*--- Detecting leaked (unreachable) malloc'd blocks. ---*/ -/*------------------------------------------------------------*/ - -/* A block is either - -- Proper-ly reached; a pointer to its start has been found - -- Interior-ly reached; only an interior pointer to it has been found - -- Unreached; so far, no pointers to any part of it have been found. -*/ -typedef - enum { Unreached, Interior, Proper } - Reachedness; - -/* A block record, used for generating err msgs. */ -typedef - struct _LossRecord { - struct _LossRecord* next; - /* Where these lost blocks were allocated. */ - ExeContext* allocated_at; - /* Their reachability. */ - Reachedness loss_mode; - /* Number of blocks and total # bytes involved. */ - UInt total_bytes; - UInt num_blocks; - } - LossRecord; - - -/* Find the i such that ptr points at or inside the block described by - shadows[i]. Return -1 if none found. This assumes that shadows[] - has been sorted on the ->data field. */ - -#ifdef VG_DEBUG_LEAKCHECK -/* Used to sanity-check the fast binary-search mechanism. */ -static Int find_shadow_for_OLD ( Addr ptr, - ShadowChunk** shadows, - Int n_shadows ) - -{ - Int i; - Addr a_lo, a_hi; - PROF_EVENT(70); - for (i = 0; i < n_shadows; i++) { - PROF_EVENT(71); - a_lo = shadows[i]->data; - a_hi = ((Addr)shadows[i]->data) + shadows[i]->size - 1; - if (a_lo <= ptr && ptr <= a_hi) - return i; - } - return -1; -} -#endif - - -static Int find_shadow_for ( Addr ptr, - ShadowChunk** shadows, - Int n_shadows ) -{ - Addr a_mid_lo, a_mid_hi; - Int lo, mid, hi, retVal; - PROF_EVENT(70); - /* VG_(printf)("find shadow for %p = ", ptr); */ - retVal = -1; - lo = 0; - hi = n_shadows-1; - while (True) { - PROF_EVENT(71); - - /* invariant: current unsearched space is from lo to hi, - inclusive. */ - if (lo > hi) break; /* not found */ - - mid = (lo + hi) / 2; - a_mid_lo = shadows[mid]->data; - a_mid_hi = ((Addr)shadows[mid]->data) + shadows[mid]->size - 1; - - if (ptr < a_mid_lo) { - hi = mid-1; - continue; - } - if (ptr > a_mid_hi) { - lo = mid+1; - continue; - } - vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi); - retVal = mid; - break; - } - -# ifdef VG_DEBUG_LEAKCHECK - vg_assert(retVal == find_shadow_for_OLD ( ptr, shadows, n_shadows )); -# endif - /* VG_(printf)("%d\n", retVal); */ - return retVal; -} - - - -static void sort_malloc_shadows ( ShadowChunk** shadows, UInt n_shadows ) -{ - Int incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280, - 9841, 29524, 88573, 265720, - 797161, 2391484 }; - Int lo = 0; - Int hi = n_shadows-1; - Int i, j, h, bigN, hp; - ShadowChunk* v; - - PROF_EVENT(72); - bigN = hi - lo + 1; if (bigN < 2) return; - hp = 0; while (incs[hp] < bigN) hp++; hp--; - - for (; hp >= 0; hp--) { - PROF_EVENT(73); - h = incs[hp]; - i = lo + h; - while (1) { - PROF_EVENT(74); - if (i > hi) break; - v = shadows[i]; - j = i; - while (shadows[j-h]->data > v->data) { - PROF_EVENT(75); - shadows[j] = shadows[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - shadows[j] = v; - i++; - } - } -} - -/* Globals, for the callback used by VG_(detect_memory_leaks). */ - -static ShadowChunk** vglc_shadows; -static Int vglc_n_shadows; -static Reachedness* vglc_reachedness; -static Addr vglc_min_mallocd_addr; -static Addr vglc_max_mallocd_addr; - -static -void vg_detect_memory_leaks_notify_addr ( Addr a, UInt word_at_a ) -{ - Int sh_no; - Addr ptr; - - /* Rule out some known causes of bogus pointers. Mostly these do - not cause much trouble because only a few false pointers can - ever lurk in these places. This mainly stops it reporting that - blocks are still reachable in stupid test programs like this - - int main (void) { char* a = malloc(100); return 0; } - - which people seem inordinately fond of writing, for some reason. - - Note that this is a complete kludge. It would be better to - ignore any addresses corresponding to valgrind.so's .bss and - .data segments, but I cannot think of a reliable way to identify - where the .bss segment has been put. If you can, drop me a - line. - */ - if (a >= ((Addr)(&VG_(stack))) - && a <= ((Addr)(&VG_(stack))) + sizeof(VG_(stack))) { - return; - } - if (a >= ((Addr)(&VG_(m_state_static))) - && a <= ((Addr)(&VG_(m_state_static))) + sizeof(VG_(m_state_static))) { - return; - } - if (a == (Addr)(&vglc_min_mallocd_addr)) - return; - if (a == (Addr)(&vglc_max_mallocd_addr)) - return; - - /* OK, let's get on and do something Useful for a change. */ - - ptr = (Addr)word_at_a; - if (ptr >= vglc_min_mallocd_addr && ptr <= vglc_max_mallocd_addr) { - /* Might be legitimate; we'll have to investigate further. */ - sh_no = find_shadow_for ( ptr, vglc_shadows, vglc_n_shadows ); - if (sh_no != -1) { - /* Found a block at/into which ptr points. */ - vg_assert(sh_no >= 0 && sh_no < vglc_n_shadows); - vg_assert(ptr < vglc_shadows[sh_no]->data - + vglc_shadows[sh_no]->size); - /* Decide whether Proper-ly or Interior-ly reached. */ - if (ptr == vglc_shadows[sh_no]->data) { - if (0) VG_(printf)("pointer at %p to %p\n", a, word_at_a ); - vglc_reachedness[sh_no] = Proper; - } else { - if (vglc_reachedness[sh_no] == Unreached) - vglc_reachedness[sh_no] = Interior; - } - } - } -} - - -void VG_(detect_memory_leaks) ( void ) -{ - Int i; - Int blocks_leaked, bytes_leaked; - Int blocks_dubious, bytes_dubious; - Int blocks_reachable, bytes_reachable; - Int n_lossrecords; - UInt bytes_notified; - - LossRecord* errlist; - LossRecord* p; - - Bool (*ec_comparer_fn) ( ExeContext*, ExeContext* ); - PROF_EVENT(76); - vg_assert(VG_(clo_instrument)); - - /* Decide how closely we want to match ExeContexts in leak - records. */ - switch (VG_(clo_leak_resolution)) { - case 2: - ec_comparer_fn = VG_(eq_ExeContext_top2); - break; - case 4: - ec_comparer_fn = VG_(eq_ExeContext_top4); - break; - case VG_DEEPEST_BACKTRACE: - ec_comparer_fn = VG_(eq_ExeContext_all); - break; - default: - VG_(panic)("VG_(detect_memory_leaks): " - "bad VG_(clo_leak_resolution)"); - break; - } - - /* vg_get_malloc_shadows allocates storage for shadows */ - vglc_shadows = VG_(get_malloc_shadows)( &vglc_n_shadows ); - if (vglc_n_shadows == 0) { - vg_assert(vglc_shadows == NULL); - VG_(message)(Vg_UserMsg, - "No malloc'd blocks -- no leaks are possible.\n"); - return; - } - - VG_(message)(Vg_UserMsg, - "searching for pointers to %d not-freed blocks.", - vglc_n_shadows ); - sort_malloc_shadows ( vglc_shadows, vglc_n_shadows ); - - /* Sanity check; assert that the blocks are now in order and that - they don't overlap. */ - for (i = 0; i < vglc_n_shadows-1; i++) { - vg_assert( ((Addr)vglc_shadows[i]->data) - < ((Addr)vglc_shadows[i+1]->data) ); - vg_assert( ((Addr)vglc_shadows[i]->data) + vglc_shadows[i]->size - < ((Addr)vglc_shadows[i+1]->data) ); - } - - vglc_min_mallocd_addr = ((Addr)vglc_shadows[0]->data); - vglc_max_mallocd_addr = ((Addr)vglc_shadows[vglc_n_shadows-1]->data) - + vglc_shadows[vglc_n_shadows-1]->size - 1; - - vglc_reachedness - = VG_(malloc)( VG_AR_PRIVATE, vglc_n_shadows * sizeof(Reachedness) ); - for (i = 0; i < vglc_n_shadows; i++) - vglc_reachedness[i] = Unreached; - - /* Do the scan of memory. */ - bytes_notified - = VG_(scan_all_valid_memory)( &vg_detect_memory_leaks_notify_addr ) - * VKI_BYTES_PER_WORD; - - VG_(message)(Vg_UserMsg, "checked %d bytes.", bytes_notified); - - blocks_leaked = bytes_leaked = 0; - blocks_dubious = bytes_dubious = 0; - blocks_reachable = bytes_reachable = 0; - - for (i = 0; i < vglc_n_shadows; i++) { - if (vglc_reachedness[i] == Unreached) { - blocks_leaked++; - bytes_leaked += vglc_shadows[i]->size; - } - else if (vglc_reachedness[i] == Interior) { - blocks_dubious++; - bytes_dubious += vglc_shadows[i]->size; - } - else if (vglc_reachedness[i] == Proper) { - blocks_reachable++; - bytes_reachable += vglc_shadows[i]->size; - } - } - - VG_(message)(Vg_UserMsg, ""); - VG_(message)(Vg_UserMsg, "definitely lost: %d bytes in %d blocks.", - bytes_leaked, blocks_leaked ); - VG_(message)(Vg_UserMsg, "possibly lost: %d bytes in %d blocks.", - bytes_dubious, blocks_dubious ); - VG_(message)(Vg_UserMsg, "still reachable: %d bytes in %d blocks.", - bytes_reachable, blocks_reachable ); - - - /* Common up the lost blocks so we can print sensible error - messages. */ - - n_lossrecords = 0; - errlist = NULL; - for (i = 0; i < vglc_n_shadows; i++) { - for (p = errlist; p != NULL; p = p->next) { - if (p->loss_mode == vglc_reachedness[i] - && ec_comparer_fn ( - p->allocated_at, - vglc_shadows[i]->where) ) { - break; - } - } - if (p != NULL) { - p->num_blocks ++; - p->total_bytes += vglc_shadows[i]->size; - } else { - n_lossrecords ++; - p = VG_(malloc)(VG_AR_PRIVATE, sizeof(LossRecord)); - p->loss_mode = vglc_reachedness[i]; - p->allocated_at = vglc_shadows[i]->where; - p->total_bytes = vglc_shadows[i]->size; - p->num_blocks = 1; - p->next = errlist; - errlist = p; - } - } - - for (i = 0; i < n_lossrecords; i++) { - LossRecord* p_min = NULL; - UInt n_min = 0xFFFFFFFF; - for (p = errlist; p != NULL; p = p->next) { - if (p->num_blocks > 0 && p->total_bytes < n_min) { - n_min = p->total_bytes; - p_min = p; - } - } - vg_assert(p_min != NULL); - - if ( (!VG_(clo_show_reachable)) && p_min->loss_mode == Proper) { - p_min->num_blocks = 0; - continue; - } - - VG_(message)(Vg_UserMsg, ""); - VG_(message)( - Vg_UserMsg, - "%d bytes in %d blocks are %s in loss record %d of %d", - p_min->total_bytes, p_min->num_blocks, - p_min->loss_mode==Unreached ? "definitely lost" : - (p_min->loss_mode==Interior ? "possibly lost" - : "still reachable"), - i+1, n_lossrecords - ); - VG_(pp_ExeContext)(p_min->allocated_at); - p_min->num_blocks = 0; - } - - VG_(message)(Vg_UserMsg, ""); - VG_(message)(Vg_UserMsg, "LEAK SUMMARY:"); - VG_(message)(Vg_UserMsg, " definitely lost: %d bytes in %d blocks.", - bytes_leaked, blocks_leaked ); - VG_(message)(Vg_UserMsg, " possibly lost: %d bytes in %d blocks.", - bytes_dubious, blocks_dubious ); - VG_(message)(Vg_UserMsg, " still reachable: %d bytes in %d blocks.", - bytes_reachable, blocks_reachable ); - if (!VG_(clo_show_reachable)) { - VG_(message)(Vg_UserMsg, - "Reachable blocks (those to which a pointer was found) are not shown."); - VG_(message)(Vg_UserMsg, - "To see them, rerun with: --show-reachable=yes"); - } - VG_(message)(Vg_UserMsg, ""); - - VG_(free) ( VG_AR_PRIVATE, vglc_shadows ); - VG_(free) ( VG_AR_PRIVATE, vglc_reachedness ); -} - - -/* --------------------------------------------------------------------- - Sanity check machinery (permanently engaged). - ------------------------------------------------------------------ */ - -/* Check that nobody has spuriously claimed that the first or last 16 - pages (64 KB) of address space have become accessible. Failure of - the following do not per se indicate an internal consistency - problem, but they are so likely to that we really want to know - about it if so. */ - -Bool VG_(first_and_last_secondaries_look_plausible) ( void ) -{ - if (IS_DISTINGUISHED_SM(VG_(primary_map)[0]) - && IS_DISTINGUISHED_SM(VG_(primary_map)[65535])) { - return True; - } else { - return False; - } -} - - -/* A fast sanity check -- suitable for calling circa once per - millisecond. */ - -void VG_(do_sanity_checks) ( Bool force_expensive ) -{ - Int i; - Bool do_expensive_checks; - - if (VG_(sanity_level) < 1) return; - - /* --- First do all the tests that we can do quickly. ---*/ - - VG_(sanity_fast_count)++; - - /* Check that we haven't overrun our private stack. */ - for (i = 0; i < 10; i++) { - vg_assert(VG_(stack)[i] - == ((UInt)(&VG_(stack)[i]) ^ 0xA4B3C2D1)); - vg_assert(VG_(stack)[10000-1-i] - == ((UInt)(&VG_(stack)[10000-i-1]) ^ 0xABCD4321)); - } - - /* Check stuff pertaining to the memory check system. */ - - if (VG_(clo_instrument)) { - - /* Check that nobody has spuriously claimed that the first or - last 16 pages of memory have become accessible [...] */ - vg_assert(VG_(first_and_last_secondaries_look_plausible)()); - } - - /* --- Now some more expensive checks. ---*/ - - /* Once every 25 times, check some more expensive stuff. */ - - do_expensive_checks = False; - if (force_expensive) - do_expensive_checks = True; - if (VG_(sanity_level) > 1) - do_expensive_checks = True; - if (VG_(sanity_level) == 1 - && (VG_(sanity_fast_count) % 25) == 0) - do_expensive_checks = True; - - if (do_expensive_checks) { - VG_(sanity_slow_count)++; - -# if 0 - { void zzzmemscan(void); zzzmemscan(); } -# endif - - if ((VG_(sanity_fast_count) % 250) == 0) - VG_(sanity_check_tc_tt)(); - - if (VG_(clo_instrument)) { - /* Make sure nobody changed the distinguished secondary. */ - for (i = 0; i < 8192; i++) - vg_assert(vg_distinguished_secondary_map.abits[i] - == VGM_BYTE_INVALID); - for (i = 0; i < 65536; i++) - vg_assert(vg_distinguished_secondary_map.vbyte[i] - == VGM_BYTE_INVALID); - - /* Make sure that the upper 3/4 of the primary map hasn't - been messed with. */ - for (i = 65536; i < 262144; i++) - vg_assert(VG_(primary_map)[i] - == & vg_distinguished_secondary_map); - } - /* - if ((VG_(sanity_fast_count) % 500) == 0) VG_(mallocSanityCheckAll)(); - */ - } - - if (VG_(sanity_level) > 1) { - /* Check sanity of the low-level memory manager. Note that bugs - in the client's code can cause this to fail, so we don't do - this check unless specially asked for. And because it's - potentially very expensive. */ - VG_(mallocSanityCheckAll)(); - } -} - - -/* --------------------------------------------------------------------- - Debugging machinery (turn on to debug). Something of a mess. - ------------------------------------------------------------------ */ - -/* Print the value tags on the 8 integer registers & flag reg. */ - -static void uint_to_bits ( UInt x, Char* str ) -{ - Int i; - Int w = 0; - /* str must point to a space of at least 36 bytes. */ - for (i = 31; i >= 0; i--) { - str[w++] = (x & ( ((UInt)1) << i)) ? '1' : '0'; - if (i == 24 || i == 16 || i == 8) - str[w++] = ' '; - } - str[w++] = 0; - vg_assert(w == 36); -} - -/* Caution! Not vthread-safe; looks in VG_(baseBlock), not the thread - state table. */ - -void VG_(show_reg_tags) ( void ) -{ - Char buf1[36]; - Char buf2[36]; - UInt z_eax, z_ebx, z_ecx, z_edx, - z_esi, z_edi, z_ebp, z_esp, z_eflags; - - z_eax = VG_(baseBlock)[VGOFF_(sh_eax)]; - z_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)]; - z_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)]; - z_edx = VG_(baseBlock)[VGOFF_(sh_edx)]; - z_esi = VG_(baseBlock)[VGOFF_(sh_esi)]; - z_edi = VG_(baseBlock)[VGOFF_(sh_edi)]; - z_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)]; - z_esp = VG_(baseBlock)[VGOFF_(sh_esp)]; - z_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)]; - - uint_to_bits(z_eflags, buf1); - VG_(message)(Vg_DebugMsg, "efl %\n", buf1); - - uint_to_bits(z_eax, buf1); - uint_to_bits(z_ebx, buf2); - VG_(message)(Vg_DebugMsg, "eax %s ebx %s\n", buf1, buf2); - - uint_to_bits(z_ecx, buf1); - uint_to_bits(z_edx, buf2); - VG_(message)(Vg_DebugMsg, "ecx %s edx %s\n", buf1, buf2); - - uint_to_bits(z_esi, buf1); - uint_to_bits(z_edi, buf2); - VG_(message)(Vg_DebugMsg, "esi %s edi %s\n", buf1, buf2); - - uint_to_bits(z_ebp, buf1); - uint_to_bits(z_esp, buf2); - VG_(message)(Vg_DebugMsg, "ebp %s esp %s\n", buf1, buf2); -} - - -#if 0 -/* For debugging only. Scan the address space and touch all allegedly - addressible words. Useful for establishing where Valgrind's idea of - addressibility has diverged from what the kernel believes. */ - -static -void zzzmemscan_notify_word ( Addr a, UInt w ) -{ -} - -void zzzmemscan ( void ) -{ - Int n_notifies - = VG_(scan_all_valid_memory)( zzzmemscan_notify_word ); - VG_(printf)("zzzmemscan: n_bytes = %d\n", 4 * n_notifies ); -} -#endif - - - - -#if 0 -static Int zzz = 0; - -void show_bb ( Addr eip_next ) -{ - VG_(printf)("[%4d] ", zzz); - VG_(show_reg_tags)( &VG_(m_shadow ); - VG_(translate) ( eip_next, NULL, NULL, NULL ); -} -#endif /* 0 */ - -/*--------------------------------------------------------------------*/ -/*--- end vg_memory.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_messages.c b/coregrind/vg_messages.c deleted file mode 100644 index 3eaf8cd53e..0000000000 --- a/coregrind/vg_messages.c +++ /dev/null @@ -1,104 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- For sending error/informative messages. ---*/ -/*--- vg_message.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - - -#include "vg_include.h" - - -static char vg_mbuf[M_VG_MSGBUF]; -static int vg_n_mbuf; - -static void add_to_buf ( Char c ) -{ - if (vg_n_mbuf >= (M_VG_MSGBUF-1)) return; - vg_mbuf[vg_n_mbuf++] = c; - vg_mbuf[vg_n_mbuf] = 0; -} - - -/* Publically visible from here onwards. */ - -void -VG_(add_to_msg) ( Char *format, ... ) -{ - va_list vargs; - va_start(vargs,format); - VG_(vprintf) ( add_to_buf, format, vargs ); - va_end(vargs); -} - -/* Send a simple single-part message. */ -void VG_(message) ( VgMsgKind kind, Char* format, ... ) -{ - va_list vargs; - va_start(vargs,format); - VG_(start_msg) ( kind ); - VG_(vprintf) ( add_to_buf, format, vargs ); - va_end(vargs); - VG_(end_msg)(); -} - -void VG_(start_msg) ( VgMsgKind kind ) -{ - Char c; - vg_n_mbuf = 0; - vg_mbuf[vg_n_mbuf] = 0; - switch (kind) { - case Vg_UserMsg: c = '='; break; - case Vg_DebugMsg: c = '-'; break; - case Vg_DebugExtraMsg: c = '+'; break; - default: c = '?'; break; - } - VG_(add_to_msg)( "%c%c%d%c%c ", - c,c, VG_(getpid)(), c,c ); -} - - -void VG_(end_msg) ( void ) -{ - if (VG_(clo_logfile_fd) >= 0) { - add_to_buf('\n'); - VG_(write)(VG_(clo_logfile_fd), vg_mbuf, VG_(strlen)(vg_mbuf)); - } -} - - -void VG_(startup_logging) ( void ) -{ -} - -void VG_(shutdown_logging) ( void ) -{ -} - -/*--------------------------------------------------------------------*/ -/*--- end vg_message.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_mylibc.c b/coregrind/vg_mylibc.c deleted file mode 100644 index e32aee8d10..0000000000 --- a/coregrind/vg_mylibc.c +++ /dev/null @@ -1,1277 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- Reimplementation of some C library stuff, to avoid depending ---*/ -/*--- on libc.so. ---*/ -/*--- vg_mylibc.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_include.h" - - - -/* --------------------------------------------------------------------- - Really Actually DO system calls. - ------------------------------------------------------------------ */ - -/* Ripped off from /usr/include/asm/unistd.h. */ - -static -UInt vg_do_syscall0 ( UInt syscallno ) -{ - UInt __res; - __asm__ volatile ("int $0x80" - : "=a" (__res) - : "0" (syscallno) ); - return __res; -} - - -static -UInt vg_do_syscall1 ( UInt syscallno, UInt arg1 ) -{ - UInt __res; - __asm__ volatile ("int $0x80" - : "=a" (__res) - : "0" (syscallno), - "b" (arg1) ); - return __res; -} - - -static -UInt vg_do_syscall2 ( UInt syscallno, - UInt arg1, UInt arg2 ) -{ - UInt __res; - __asm__ volatile ("int $0x80" - : "=a" (__res) - : "0" (syscallno), - "b" (arg1), - "c" (arg2) ); - return __res; -} - - -static -UInt vg_do_syscall3 ( UInt syscallno, - UInt arg1, UInt arg2, UInt arg3 ) -{ - UInt __res; - __asm__ volatile ("int $0x80" - : "=a" (__res) - : "0" (syscallno), - "b" (arg1), - "c" (arg2), - "d" (arg3) ); - return __res; -} - - -static -UInt vg_do_syscall4 ( UInt syscallno, - UInt arg1, UInt arg2, UInt arg3, UInt arg4 ) -{ - UInt __res; - __asm__ volatile ("int $0x80" - : "=a" (__res) - : "0" (syscallno), - "b" (arg1), - "c" (arg2), - "d" (arg3), - "S" (arg4) ); - return __res; -} - - -#if 0 -static -UInt vg_do_syscall5 ( UInt syscallno, - UInt arg1, UInt arg2, UInt arg3, UInt arg4, - UInt arg5 ) -{ - UInt __res; - __asm__ volatile ("int $0x80" - : "=a" (__res) - : "0" (syscallno), - "b" (arg1), - "c" (arg2), - "d" (arg3), - "S" (arg4), - "D" (arg5) ); - return __res; -} -#endif - -/* --------------------------------------------------------------------- - Wrappers around system calls, and other stuff, to do with signals. - ------------------------------------------------------------------ */ - -/* sigemptyset, sigfullset, sigaddset and sigdelset return 0 on - success and -1 on error. -*/ -Int VG_(ksigfillset)( vki_ksigset_t* set ) -{ - Int i; - if (set == NULL) - return -1; - for (i = 0; i < VKI_KNSIG_WORDS; i++) - set->ws[i] = 0xFFFFFFFF; - return 0; -} - -Int VG_(ksigemptyset)( vki_ksigset_t* set ) -{ - Int i; - if (set == NULL) - return -1; - for (i = 0; i < VKI_KNSIG_WORDS; i++) - set->ws[i] = 0x0; - return 0; -} - -Bool VG_(kisemptysigset)( vki_ksigset_t* set ) -{ - Int i; - vg_assert(set != NULL); - for (i = 0; i < VKI_KNSIG_WORDS; i++) - if (set->ws[i] != 0x0) return False; - return True; -} - -Bool VG_(kisfullsigset)( vki_ksigset_t* set ) -{ - Int i; - vg_assert(set != NULL); - for (i = 0; i < VKI_KNSIG_WORDS; i++) - if (set->ws[i] != ~0x0) return False; - return True; -} - - -Int VG_(ksigaddset)( vki_ksigset_t* set, Int signum ) -{ - if (set == NULL) - return -1; - if (signum < 1 && signum > VKI_KNSIG) - return -1; - signum--; - set->ws[signum / VKI_KNSIG_BPW] |= (1 << (signum % VKI_KNSIG_BPW)); - return 0; -} - -Int VG_(ksigdelset)( vki_ksigset_t* set, Int signum ) -{ - if (set == NULL) - return -1; - if (signum < 1 && signum > VKI_KNSIG) - return -1; - signum--; - set->ws[signum / VKI_KNSIG_BPW] &= ~(1 << (signum % VKI_KNSIG_BPW)); - return 0; -} - -Int VG_(ksigismember) ( vki_ksigset_t* set, Int signum ) -{ - if (set == NULL) - return 0; - if (signum < 1 && signum > VKI_KNSIG) - return 0; - signum--; - if (1 & ((set->ws[signum / VKI_KNSIG_BPW]) >> (signum % VKI_KNSIG_BPW))) - return 1; - else - return 0; -} - - -/* Add all signals in src to dst. */ -void VG_(ksigaddset_from_set)( vki_ksigset_t* dst, vki_ksigset_t* src ) -{ - Int i; - vg_assert(dst != NULL && src != NULL); - for (i = 0; i < VKI_KNSIG_WORDS; i++) - dst->ws[i] |= src->ws[i]; -} - -/* Remove all signals in src from dst. */ -void VG_(ksigdelset_from_set)( vki_ksigset_t* dst, vki_ksigset_t* src ) -{ - Int i; - vg_assert(dst != NULL && src != NULL); - for (i = 0; i < VKI_KNSIG_WORDS; i++) - dst->ws[i] &= ~(src->ws[i]); -} - - -/* The functions sigaction, sigprocmask, sigpending and sigsuspend - return 0 on success and -1 on error. -*/ -Int VG_(ksigprocmask)( Int how, - const vki_ksigset_t* set, - vki_ksigset_t* oldset) -{ - Int res - = vg_do_syscall4(__NR_rt_sigprocmask, - how, (UInt)set, (UInt)oldset, - VKI_KNSIG_WORDS * VKI_BYTES_PER_WORD); - return VG_(is_kerror)(res) ? -1 : 0; -} - - -Int VG_(ksigaction) ( Int signum, - const vki_ksigaction* act, - vki_ksigaction* oldact) -{ - Int res - = vg_do_syscall4(__NR_rt_sigaction, - signum, (UInt)act, (UInt)oldact, - VKI_KNSIG_WORDS * VKI_BYTES_PER_WORD); - /* VG_(printf)("res = %d\n",res); */ - return VG_(is_kerror)(res) ? -1 : 0; -} - - -Int VG_(ksigaltstack)( const vki_kstack_t* ss, vki_kstack_t* oss ) -{ - Int res - = vg_do_syscall2(__NR_sigaltstack, (UInt)ss, (UInt)oss); - return VG_(is_kerror)(res) ? -1 : 0; -} - - -Int VG_(ksignal)(Int signum, void (*sighandler)(Int)) -{ - Int res; - vki_ksigaction sa; - sa.ksa_handler = sighandler; - sa.ksa_flags = VKI_SA_ONSTACK | VKI_SA_RESTART; - sa.ksa_restorer = NULL; - res = VG_(ksigemptyset)( &sa.ksa_mask ); - vg_assert(res == 0); - res = vg_do_syscall4(__NR_rt_sigaction, - signum, (UInt)(&sa), (UInt)NULL, - VKI_KNSIG_WORDS * VKI_BYTES_PER_WORD); - return VG_(is_kerror)(res) ? -1 : 0; -} - - -Int VG_(kill)( Int pid, Int signo ) -{ - Int res = vg_do_syscall2(__NR_kill, pid, signo); - return VG_(is_kerror)(res) ? -1 : 0; -} - - -Int VG_(sigpending) ( vki_ksigset_t* set ) -{ - Int res = vg_do_syscall1(__NR_sigpending, (UInt)set); - return VG_(is_kerror)(res) ? -1 : 0; -} - - -/* --------------------------------------------------------------------- - mmap/munmap, exit, fcntl - ------------------------------------------------------------------ */ - -/* Returns -1 on failure. */ -void* VG_(mmap)( void* start, UInt length, - UInt prot, UInt flags, UInt fd, UInt offset) -{ - Int res; - UInt args[6]; - args[0] = (UInt)start; - args[1] = length; - args[2] = prot; - args[3] = flags; - args[4] = fd; - args[5] = offset; - res = vg_do_syscall1(__NR_mmap, (UInt)(&(args[0])) ); - return VG_(is_kerror)(res) ? ((void*)(-1)) : (void*)res; -} - -/* Returns -1 on failure. */ -Int VG_(munmap)( void* start, Int length ) -{ - Int res = vg_do_syscall2(__NR_munmap, (UInt)start, (UInt)length ); - return VG_(is_kerror)(res) ? -1 : 0; -} - -void VG_(exit)( Int status ) -{ - (void)vg_do_syscall1(__NR_exit, (UInt)status ); - /* Why are we still alive here? */ - /*NOTREACHED*/ - vg_assert(2+2 == 5); -} - -/* Returns -1 on error. */ -Int VG_(fcntl) ( Int fd, Int cmd, Int arg ) -{ - Int res = vg_do_syscall3(__NR_fcntl, fd, cmd, arg); - return VG_(is_kerror)(res) ? -1 : res; -} - -/* Returns -1 on error. */ -Int VG_(select)( Int n, - vki_fd_set* readfds, - vki_fd_set* writefds, - vki_fd_set* exceptfds, - struct vki_timeval * timeout ) -{ - Int res; - UInt args[5]; - args[0] = n; - args[1] = (UInt)readfds; - args[2] = (UInt)writefds; - args[3] = (UInt)exceptfds; - args[4] = (UInt)timeout; - res = vg_do_syscall1(__NR_select, (UInt)(&(args[0])) ); - return VG_(is_kerror)(res) ? -1 : res; -} - -/* Returns -1 on error, 0 if ok, 1 if interrupted. */ -Int VG_(nanosleep)( const struct vki_timespec *req, - struct vki_timespec *rem ) -{ - Int res; - res = vg_do_syscall2(__NR_nanosleep, (UInt)req, (UInt)rem); - if (res == -VKI_EINVAL) return -1; - if (res == -VKI_EINTR) return 1; - return 0; -} - -void* VG_(brk) ( void* end_data_segment ) -{ - Int res; - res = vg_do_syscall1(__NR_brk, (UInt)end_data_segment); - return (void*)( VG_(is_kerror)(res) ? -1 : res ); -} - - -/* --------------------------------------------------------------------- - printf implementation. The key function, vg_vprintf(), emits chars - into a caller-supplied function. Distantly derived from: - - vprintf replacement for Checker. - Copyright 1993, 1994, 1995 Tristan Gingold - Written September 1993 Tristan Gingold - Tristan Gingold, 8 rue Parmentier, F-91120 PALAISEAU, FRANCE - - (Checker itself was GPL'd.) - ------------------------------------------------------------------ */ - - -/* Some flags. */ -#define VG_MSG_SIGNED 1 /* The value is signed. */ -#define VG_MSG_ZJUSTIFY 2 /* Must justify with '0'. */ -#define VG_MSG_LJUSTIFY 4 /* Must justify on the left. */ - - -/* Copy a string into the buffer. */ -static void -myvprintf_str ( void(*send)(Char), Int flags, Int width, Char* str, - Bool capitalise ) -{ -# define MAYBE_TOUPPER(ch) (capitalise ? VG_(toupper)(ch) : (ch)) - - Int i, extra; - Int len = VG_(strlen)(str); - - if (width == 0) { - for (i = 0; i < len; i++) - send(MAYBE_TOUPPER(str[i])); - return; - } - - if (len > width) { - for (i = 0; i < width; i++) - send(MAYBE_TOUPPER(str[i])); - return; - } - - extra = width - len; - if (flags & VG_MSG_LJUSTIFY) { - for (i = 0; i < extra; i++) - send(' '); - } - for (i = 0; i < len; i++) - send(MAYBE_TOUPPER(str[i])); - if (!(flags & VG_MSG_LJUSTIFY)) { - for (i = 0; i < extra; i++) - send(' '); - } - -# undef MAYBE_TOUPPER -} - -/* Write P into the buffer according to these args: - * If SIGN is true, p is a signed. - * BASE is the base. - * If WITH_ZERO is true, '0' must be added. - * WIDTH is the width of the field. - */ -static void -myvprintf_int64 ( void(*send)(Char), Int flags, Int base, Int width, ULong p) -{ - Char buf[40]; - Int ind = 0; - Int i; - Bool neg = False; - Char *digits = "0123456789ABCDEF"; - - if (base < 2 || base > 16) - return; - - if ((flags & VG_MSG_SIGNED) && (Long)p < 0) { - p = - (Long)p; - neg = True; - } - - if (p == 0) - buf[ind++] = '0'; - else { - while (p > 0) { - buf[ind++] = digits[p % base]; - p /= base; - } - } - - if (neg) - buf[ind++] = '-'; - - if (width > 0 && !(flags & VG_MSG_LJUSTIFY)) { - for(; ind < width; ind++) { - vg_assert(ind < 39); - buf[ind] = (flags & VG_MSG_ZJUSTIFY) ? '0': ' '; - } - } - - /* Reverse copy to buffer. */ - for (i = ind -1; i >= 0; i--) - send(buf[i]); - - if (width > 0 && (flags & VG_MSG_LJUSTIFY)) { - for(; ind < width; ind++) - send((flags & VG_MSG_ZJUSTIFY) ? '0': ' '); - } -} - - -/* A simple vprintf(). */ -void -VG_(vprintf) ( void(*send)(Char), const Char *format, va_list vargs ) -{ - int i; - int flags; - int width; - Bool is_long; - - /* We assume that vargs has already been initialised by the - caller, using va_start, and that the caller will similarly - clean up with va_end. - */ - - for (i = 0; format[i] != 0; i++) { - if (format[i] != '%') { - send(format[i]); - continue; - } - i++; - /* A '%' has been found. Ignore a trailing %. */ - if (format[i] == 0) - break; - if (format[i] == '%') { - /* `%%' is replaced by `%'. */ - send('%'); - continue; - } - flags = 0; - is_long = False; - width = 0; /* length of the field. */ - /* If '-' follows '%', justify on the left. */ - if (format[i] == '-') { - flags |= VG_MSG_LJUSTIFY; - i++; - } - /* If '0' follows '%', pads will be inserted. */ - if (format[i] == '0') { - flags |= VG_MSG_ZJUSTIFY; - i++; - } - /* Compute the field length. */ - while (format[i] >= '0' && format[i] <= '9') { - width *= 10; - width += format[i++] - '0'; - } - while (format[i] == 'l') { - i++; - is_long = True; - } - - switch (format[i]) { - case 'd': /* %d */ - flags |= VG_MSG_SIGNED; - if (is_long) - myvprintf_int64(send, flags, 10, width, - (ULong)(va_arg (vargs, Long))); - else - myvprintf_int64(send, flags, 10, width, - (ULong)(va_arg (vargs, Int))); - break; - case 'u': /* %u */ - if (is_long) - myvprintf_int64(send, flags, 10, width, - (ULong)(va_arg (vargs, ULong))); - else - myvprintf_int64(send, flags, 10, width, - (ULong)(va_arg (vargs, UInt))); - break; - case 'p': /* %p */ - send('0'); - send('x'); - myvprintf_int64(send, flags, 16, width, - (ULong)((UInt)va_arg (vargs, void *))); - break; - case 'x': /* %x */ - if (is_long) - myvprintf_int64(send, flags, 16, width, - (ULong)(va_arg (vargs, ULong))); - else - myvprintf_int64(send, flags, 16, width, - (ULong)(va_arg (vargs, UInt))); - break; - case 'c': /* %c */ - send(va_arg (vargs, int)); - break; - case 's': case 'S': { /* %s */ - char *str = va_arg (vargs, char *); - if (str == (char*) 0) str = "(null)"; - myvprintf_str(send, flags, width, str, format[i]=='S'); - break; - } - default: - break; - } - } -} - - -/* A general replacement for printf(). Note that only low-level - debugging info should be sent via here. The official route is to - to use vg_message(). This interface is deprecated. -*/ -static char myprintf_buf[100]; -static int n_myprintf_buf; - -static void add_to_myprintf_buf ( Char c ) -{ - if (n_myprintf_buf >= 100-10 /*paranoia*/ ) { - if (VG_(clo_logfile_fd) >= 0) - VG_(write) - (VG_(clo_logfile_fd), myprintf_buf, VG_(strlen)(myprintf_buf)); - n_myprintf_buf = 0; - myprintf_buf[n_myprintf_buf] = 0; - } - myprintf_buf[n_myprintf_buf++] = c; - myprintf_buf[n_myprintf_buf] = 0; -} - -void VG_(printf) ( const char *format, ... ) -{ - va_list vargs; - va_start(vargs,format); - - n_myprintf_buf = 0; - myprintf_buf[n_myprintf_buf] = 0; - VG_(vprintf) ( add_to_myprintf_buf, format, vargs ); - - if (n_myprintf_buf > 0 && VG_(clo_logfile_fd) >= 0) - VG_(write) - ( VG_(clo_logfile_fd), myprintf_buf, VG_(strlen)(myprintf_buf)); - - va_end(vargs); -} - - -/* A general replacement for sprintf(). */ -static Char* vg_sprintf_ptr; - -static void add_to_vg_sprintf_buf ( Char c ) -{ - *vg_sprintf_ptr++ = c; -} - -void VG_(sprintf) ( Char* buf, Char *format, ... ) -{ - va_list vargs; - va_start(vargs,format); - - vg_sprintf_ptr = buf; - VG_(vprintf) ( add_to_vg_sprintf_buf, format, vargs ); - add_to_vg_sprintf_buf(0); - - va_end(vargs); -} - - -/* --------------------------------------------------------------------- - Misc str* functions. - ------------------------------------------------------------------ */ - -Bool VG_(isspace) ( Char c ) -{ - return (c == ' ' || c == '\n' || c == '\t' || c == 0); -} - -Bool VG_(isdigit) ( Char c ) -{ - return (c >= '0' && c <= '9'); -} - -Int VG_(strlen) ( const Char* str ) -{ - Int i = 0; - while (str[i] != 0) i++; - return i; -} - - -Long VG_(atoll) ( Char* str ) -{ - Bool neg = False; - Long n = 0; - if (*str == '-') { str++; neg = True; }; - while (*str >= '0' && *str <= '9') { - n = 10*n + (Long)(*str - '0'); - str++; - } - if (neg) n = -n; - return n; -} - - -Long VG_(atoll36) ( Char* str ) -{ - Bool neg = False; - Long n = 0; - if (*str == '-') { str++; neg = True; }; - while (True) { - if (*str >= '0' && *str <= '9') { - n = 36*n + (Long)(*str - '0'); - } - else - if (*str >= 'A' && *str <= 'Z') { - n = 36*n + (Long)((*str - 'A') + 10); - } - else - if (*str >= 'a' && *str <= 'z') { - n = 36*n + (Long)((*str - 'a') + 10); - } - else { - break; - } - str++; - } - if (neg) n = -n; - return n; -} - - -Char* VG_(strcat) ( Char* dest, const Char* src ) -{ - Char* dest_orig = dest; - while (*dest) dest++; - while (*src) *dest++ = *src++; - *dest = 0; - return dest_orig; -} - - -Char* VG_(strncat) ( Char* dest, const Char* src, Int n ) -{ - Char* dest_orig = dest; - while (*dest) dest++; - while (*src && n > 0) { *dest++ = *src++; n--; } - *dest = 0; - return dest_orig; -} - - -Char* VG_(strpbrk) ( const Char* s, const Char* accept ) -{ - const Char* a; - while (*s) { - a = accept; - while (*a) - if (*a++ == *s) - return (Char *) s; - s++; - } - return NULL; -} - - -Char* VG_(strcpy) ( Char* dest, const Char* src ) -{ - Char* dest_orig = dest; - while (*src) *dest++ = *src++; - *dest = 0; - return dest_orig; -} - - -/* Copy bytes, not overrunning the end of dest and always ensuring - zero termination. */ -void VG_(strncpy_safely) ( Char* dest, const Char* src, Int ndest ) -{ - Int i; - vg_assert(ndest > 0); - i = 0; - dest[i] = 0; - while (True) { - if (src[i] == 0) return; - if (i >= ndest-1) return; - dest[i] = src[i]; - i++; - dest[i] = 0; - } -} - - -void VG_(strncpy) ( Char* dest, const Char* src, Int ndest ) -{ - VG_(strncpy_safely)( dest, src, ndest+1 ); -} - - -Int VG_(strcmp) ( const Char* s1, const Char* s2 ) -{ - while (True) { - if (*s1 == 0 && *s2 == 0) return 0; - if (*s1 == 0) return -1; - if (*s2 == 0) return 1; - - if (*(UChar*)s1 < *(UChar*)s2) return -1; - if (*(UChar*)s1 > *(UChar*)s2) return 1; - - s1++; s2++; - } -} - - -Int VG_(strcmp_ws) ( const Char* s1, const Char* s2 ) -{ - while (True) { - if (VG_(isspace)(*s1) && VG_(isspace)(*s2)) return 0; - if (VG_(isspace)(*s1)) return -1; - if (VG_(isspace)(*s2)) return 1; - - if (*(UChar*)s1 < *(UChar*)s2) return -1; - if (*(UChar*)s1 > *(UChar*)s2) return 1; - - s1++; s2++; - } -} - - -Int VG_(strncmp) ( const Char* s1, const Char* s2, Int nmax ) -{ - Int n = 0; - while (True) { - if (n >= nmax) return 0; - if (*s1 == 0 && *s2 == 0) return 0; - if (*s1 == 0) return -1; - if (*s2 == 0) return 1; - - if (*(UChar*)s1 < *(UChar*)s2) return -1; - if (*(UChar*)s1 > *(UChar*)s2) return 1; - - s1++; s2++; n++; - } -} - - -Int VG_(strncmp_ws) ( const Char* s1, const Char* s2, Int nmax ) -{ - Int n = 0; - while (True) { - if (n >= nmax) return 0; - if (VG_(isspace)(*s1) && VG_(isspace)(*s2)) return 0; - if (VG_(isspace)(*s1)) return -1; - if (VG_(isspace)(*s2)) return 1; - - if (*(UChar*)s1 < *(UChar*)s2) return -1; - if (*(UChar*)s1 > *(UChar*)s2) return 1; - - s1++; s2++; n++; - } -} - - -Char* VG_(strstr) ( const Char* haystack, Char* needle ) -{ - Int n; - if (haystack == NULL) - return NULL; - n = VG_(strlen)(needle); - while (True) { - if (haystack[0] == 0) - return NULL; - if (VG_(strncmp)(haystack, needle, n) == 0) - return (Char*)haystack; - haystack++; - } -} - - -Char* VG_(strchr) ( const Char* s, Char c ) -{ - while (True) { - if (*s == c) return (Char*)s; - if (*s == 0) return NULL; - s++; - } -} - - -Char VG_(toupper) ( Char c ) -{ - if (c >= 'a' && c <= 'z') - return c + ('A' - 'a'); - else - return c; -} - - -Char* VG_(strdup) ( ArenaId aid, const Char* s ) -{ - Int i; - Int len = VG_(strlen)(s) + 1; - Char* res = VG_(malloc) (aid, len); - for (i = 0; i < len; i++) - res[i] = s[i]; - return res; -} - - -/* --------------------------------------------------------------------- - A simple string matching routine, purloined from Hugs98. - `*' matches any sequence of zero or more characters - `?' matches any single character exactly - `\c' matches the character c only (ignoring special chars) - c matches the character c only - ------------------------------------------------------------------ */ - -/* Keep track of recursion depth. */ -static Int recDepth; - -static Bool stringMatch_wrk ( Char* pat, Char* str ) -{ - vg_assert(recDepth >= 0 && recDepth < 250); - recDepth++; - for (;;) { - switch (*pat) { - case '\0' : return (*str=='\0'); - case '*' : do { - if (stringMatch_wrk(pat+1,str)) { - recDepth--; - return True; - } - } while (*str++); - recDepth--; - return False; - case '?' : if (*str++=='\0') { - recDepth--; - return False; - } - pat++; - break; - case '\\' : if (*++pat == '\0') { - recDepth--; - return False; /* spurious trailing \ in pattern */ - } - /* falls through to ... */ - default : if (*pat++ != *str++) { - recDepth--; - return False; - } - break; - } - } -} - -Bool VG_(stringMatch) ( Char* pat, Char* str ) -{ - Bool b; - recDepth = 0; - b = stringMatch_wrk ( pat, str ); - /* - VG_(printf)("%s %s %s\n", - b?"TRUE ":"FALSE", pat, str); - */ - return b; -} - - -/* --------------------------------------------------------------------- - Assertery. - ------------------------------------------------------------------ */ - -void VG_(assert_fail) ( Char* expr, Char* file, Int line, Char* fn ) -{ - static Bool entered = False; - if (entered) - VG_(exit)(2); - entered = True; - VG_(printf)("\n%s: %s:%d (%s): Assertion `%s' failed.\n", - "valgrind", file, line, fn, expr ); - VG_(pp_sched_status)(); - VG_(printf)("Please report this bug to me at: %s\n\n", VG_EMAIL_ADDR); - VG_(shutdown_logging)(); - VG_(exit)(1); -} - -void VG_(panic) ( Char* str ) -{ - VG_(printf)("\nvalgrind: the `impossible' happened:\n %s\n", str); - VG_(printf)("Basic block ctr is approximately %llu\n", VG_(bbs_done) ); - VG_(pp_sched_status)(); - VG_(printf)("Please report this bug to me at: %s\n\n", VG_EMAIL_ADDR); - VG_(shutdown_logging)(); - VG_(exit)(1); -} - - -/* --------------------------------------------------------------------- - Primitive support for reading files. - ------------------------------------------------------------------ */ - -/* Returns -1 on failure. */ -Int VG_(open_read) ( Char* pathname ) -{ - Int fd; - /* VG_(printf)("vg_open_read %s\n", pathname ); */ - - /* This gets a segmentation fault if pathname isn't a valid file. - I don't know why. It seems like the call to open is getting - intercepted and messed with by glibc ... */ - /* fd = open( pathname, O_RDONLY ); */ - /* ... so we go direct to the horse's mouth, which seems to work - ok: */ - const int O_RDONLY = 0; /* See /usr/include/bits/fcntl.h */ - fd = vg_do_syscall3(__NR_open, (UInt)pathname, O_RDONLY, 0); - /* VG_(printf)("result = %d\n", fd); */ - if (VG_(is_kerror)(fd)) fd = -1; - return fd; -} - -/* Returns -1 on failure. */ -static Int VG_(chmod_u_rw) ( Int fd ) -{ - Int res; - const int O_IRUSR_IWUSR = 000600; /* See /usr/include/cpio.h */ - res = vg_do_syscall2(__NR_fchmod, fd, O_IRUSR_IWUSR); - if (VG_(is_kerror)(res)) res = -1; - return res; -} - -/* Returns -1 on failure. */ -Int VG_(create_and_write) ( Char* pathname ) -{ - Int fd; - - const int O_CR_AND_WR_ONLY = 0101; /* See /usr/include/bits/fcntl.h */ - fd = vg_do_syscall3(__NR_open, (UInt)pathname, O_CR_AND_WR_ONLY, 0); - /* VG_(printf)("result = %d\n", fd); */ - if (VG_(is_kerror)(fd)) { - fd = -1; - } else { - VG_(chmod_u_rw)(fd); - if (VG_(is_kerror)(fd)) { - fd = -1; - } - } - return fd; -} - -/* Returns -1 on failure. */ -Int VG_(open_write) ( Char* pathname ) -{ - Int fd; - - const int O_WRONLY_AND_TRUNC = 01001; /* See /usr/include/bits/fcntl.h */ - fd = vg_do_syscall3(__NR_open, (UInt)pathname, O_WRONLY_AND_TRUNC, 0); - /* VG_(printf)("result = %d\n", fd); */ - if (VG_(is_kerror)(fd)) { - fd = -1; - } - return fd; -} - -void VG_(close) ( Int fd ) -{ - vg_do_syscall1(__NR_close, fd); -} - - -Int VG_(read) ( Int fd, void* buf, Int count) -{ - Int res; - /* res = read( fd, buf, count ); */ - res = vg_do_syscall3(__NR_read, fd, (UInt)buf, count); - if (VG_(is_kerror)(res)) res = -1; - return res; -} - -Int VG_(write) ( Int fd, void* buf, Int count) -{ - Int res; - /* res = write( fd, buf, count ); */ - res = vg_do_syscall3(__NR_write, fd, (UInt)buf, count); - if (VG_(is_kerror)(res)) res = -1; - return res; -} - -Int VG_(stat) ( Char* file_name, struct vki_stat* buf ) -{ - Int res; - res = vg_do_syscall2(__NR_stat, (UInt)file_name, (UInt)buf); - return - VG_(is_kerror)(res) ? (-1) : 0; -} - -/* Misc functions looking for a proper home. */ - -/* We do getenv without libc's help by snooping around in - VG_(client_env) as determined at startup time. */ -Char* VG_(getenv) ( Char* varname ) -{ - Int i, n; - n = VG_(strlen)(varname); - for (i = 0; VG_(client_envp)[i] != NULL; i++) { - Char* s = VG_(client_envp)[i]; - if (VG_(strncmp)(varname, s, n) == 0 && s[n] == '=') { - return & s[n+1]; - } - } - return NULL; -} - -/* You'd be amazed how many places need to know the current pid. */ -Int VG_(getpid) ( void ) -{ - Int res; - /* res = getpid(); */ - res = vg_do_syscall0(__NR_getpid); - return res; -} - -/* Return -1 if error, else 0. NOTE does not indicate return code of - child! */ -Int VG_(system) ( Char* cmd ) -{ - Int pid, res; - void* environ[1] = { NULL }; - if (cmd == NULL) - return 1; - pid = vg_do_syscall0(__NR_fork); - if (VG_(is_kerror)(pid)) - return -1; - if (pid == 0) { - /* child */ - Char* argv[4]; - argv[0] = "/bin/sh"; - argv[1] = "-c"; - argv[2] = cmd; - argv[3] = 0; - (void)vg_do_syscall3(__NR_execve, - (UInt)"/bin/sh", (UInt)argv, (UInt)&environ); - /* If we're still alive here, execve failed. */ - return -1; - } else { - /* parent */ - res = vg_do_syscall3(__NR_waitpid, pid, (UInt)NULL, 0); - if (VG_(is_kerror)(res)) { - return -1; - } else { - return 0; - } - } -} - - -/* --------------------------------------------------------------------- - Support for a millisecond-granularity counter using RDTSC. - ------------------------------------------------------------------ */ - -static __inline__ ULong do_rdtsc_insn ( void ) -{ - ULong x; - __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x)); - return x; -} - -/* 0 = pre-calibration, 1 = calibration, 2 = running */ -static Int rdtsc_calibration_state = 0; -static ULong rdtsc_ticks_per_millisecond = 0; /* invalid value */ - -static struct vki_timeval rdtsc_cal_start_timeval; -static struct vki_timeval rdtsc_cal_end_timeval; - -static ULong rdtsc_cal_start_raw; -static ULong rdtsc_cal_end_raw; - -UInt VG_(read_millisecond_timer) ( void ) -{ - ULong rdtsc_now; - vg_assert(rdtsc_calibration_state == 2); - rdtsc_now = do_rdtsc_insn(); - vg_assert(rdtsc_now > rdtsc_cal_end_raw); - rdtsc_now -= rdtsc_cal_end_raw; - rdtsc_now /= rdtsc_ticks_per_millisecond; - return (UInt)rdtsc_now; -} - - -void VG_(start_rdtsc_calibration) ( void ) -{ - Int res; - vg_assert(rdtsc_calibration_state == 0); - rdtsc_calibration_state = 1; - rdtsc_cal_start_raw = do_rdtsc_insn(); - res = vg_do_syscall2(__NR_gettimeofday, (UInt)&rdtsc_cal_start_timeval, - (UInt)NULL); - vg_assert(!VG_(is_kerror)(res)); -} - -void VG_(end_rdtsc_calibration) ( void ) -{ - Int res, loops; - ULong cpu_clock_MHZ; - ULong cal_clock_ticks; - ULong cal_wallclock_microseconds; - ULong wallclock_start_microseconds; - ULong wallclock_end_microseconds; - struct vki_timespec req; - struct vki_timespec rem; - - vg_assert(rdtsc_calibration_state == 1); - rdtsc_calibration_state = 2; - - /* Try and delay for 20 milliseconds, so that we can at least have - some minimum level of accuracy. */ - req.tv_sec = 0; - req.tv_nsec = 20 * 1000 * 1000; - loops = 0; - while (True) { - res = VG_(nanosleep)(&req, &rem); - vg_assert(res == 0 /*ok*/ || res == 1 /*interrupted*/); - if (res == 0) - break; - if (rem.tv_sec == 0 && rem.tv_nsec == 0) - break; - req = rem; - loops++; - if (loops > 100) - VG_(panic)("calibration nanosleep loop failed?!"); - } - - /* Now read both timers, and do the Math. */ - rdtsc_cal_end_raw = do_rdtsc_insn(); - res = vg_do_syscall2(__NR_gettimeofday, (UInt)&rdtsc_cal_end_timeval, - (UInt)NULL); - - vg_assert(rdtsc_cal_end_raw > rdtsc_cal_start_raw); - cal_clock_ticks = rdtsc_cal_end_raw - rdtsc_cal_start_raw; - - wallclock_start_microseconds - = (1000000ULL * (ULong)(rdtsc_cal_start_timeval.tv_sec)) - + (ULong)(rdtsc_cal_start_timeval.tv_usec); - wallclock_end_microseconds - = (1000000ULL * (ULong)(rdtsc_cal_end_timeval.tv_sec)) - + (ULong)(rdtsc_cal_end_timeval.tv_usec); - vg_assert(wallclock_end_microseconds > wallclock_start_microseconds); - cal_wallclock_microseconds - = wallclock_end_microseconds - wallclock_start_microseconds; - - /* Since we just nanoslept for 20 ms ... */ - vg_assert(cal_wallclock_microseconds >= 20000); - - /* Now we know (roughly) that cal_clock_ticks on RDTSC take - cal_wallclock_microseconds elapsed time. Calculate the RDTSC - ticks-per-millisecond value. */ - if (0) - VG_(printf)("%lld ticks in %lld microseconds\n", - cal_clock_ticks, cal_wallclock_microseconds ); - - rdtsc_ticks_per_millisecond - = cal_clock_ticks / (cal_wallclock_microseconds / 1000ULL); - cpu_clock_MHZ - = (1000ULL * rdtsc_ticks_per_millisecond) / 1000000ULL; - if (VG_(clo_verbosity) >= 1) - VG_(message)(Vg_UserMsg, "Estimated CPU clock rate is %d MHz", - (UInt)cpu_clock_MHZ); - if (cpu_clock_MHZ < 50 || cpu_clock_MHZ > 10000) - VG_(panic)("end_rdtsc_calibration: " - "estimated CPU MHz outside range 50 .. 10000"); - /* Paranoia about division by zero later. */ - vg_assert(rdtsc_ticks_per_millisecond != 0); - if (0) - VG_(printf)("ticks per millisecond %llu\n", - rdtsc_ticks_per_millisecond); -} - - - -/* --------------------------------------------------------------------- - Primitive support for bagging memory via mmap. - ------------------------------------------------------------------ */ - -void* VG_(get_memory_from_mmap) ( Int nBytes, Char* who ) -{ - static UInt tot_alloc = 0; - void* p = VG_(mmap)( 0, nBytes, - VKI_PROT_READ|VKI_PROT_WRITE|VKI_PROT_EXEC, - VKI_MAP_PRIVATE|VKI_MAP_ANONYMOUS, -1, 0 ); - if (p != ((void*)(-1))) { - tot_alloc += (UInt)nBytes; - if (0) - VG_(printf)( - "get_memory_from_mmap: %d tot, %d req = %p .. %p, caller %s\n", - tot_alloc, nBytes, p, ((char*)p) + nBytes - 1, who ); - return p; - } - VG_(printf)("vg_get_memory_from_mmap failed on request of %d\n", - nBytes); - VG_(panic)("vg_get_memory_from_mmap: out of memory! Fatal! Bye!\n"); -} - - -/*--------------------------------------------------------------------*/ -/*--- end vg_mylibc.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_procselfmaps.c b/coregrind/vg_procselfmaps.c deleted file mode 100644 index ceba7b3bf0..0000000000 --- a/coregrind/vg_procselfmaps.c +++ /dev/null @@ -1,200 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- A simple parser for /proc/self/maps on Linux 2.4.X ---*/ -/*--- vg_procselfmaps.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - - -#include "vg_include.h" - - -/* static ... to keep it out of the stack frame. */ - -static Char procmap_buf[M_PROCMAP_BUF]; - - -/* Helper fns. */ - -static Int hexdigit ( Char c ) -{ - if (c >= '0' && c <= '9') return (Int)(c - '0'); - if (c >= 'a' && c <= 'f') return 10 + (Int)(c - 'a'); - if (c >= 'A' && c <= 'F') return 10 + (Int)(c - 'A'); - return -1; -} - -static Int readchar ( Char* buf, Char* ch ) -{ - if (*buf == 0) return 0; - *ch = *buf; - return 1; -} - -static Int readhex ( Char* buf, UInt* val ) -{ - Int n = 0; - *val = 0; - while (hexdigit(*buf) >= 0) { - *val = (*val << 4) + hexdigit(*buf); - n++; buf++; - } - return n; -} - - - -/* Read /proc/self/maps. For each map entry, call - record_mapping, passing it, in this order: - - start address in memory - length - r permissions char; either - or r - w permissions char; either - or w - x permissions char; either - or x - offset in file, or zero if no file - filename, zero terminated, or NULL if no file - - So the sig of the called fn might be - - void (*record_mapping)( Addr start, UInt size, - Char r, Char w, Char x, - UInt foffset, UChar* filename ) - - Note that the supplied filename is transiently stored; record_mapping - should make a copy if it wants to keep it. - - If there's a syntax error or other failure, just abort. -*/ - -void VG_(read_procselfmaps) ( - void (*record_mapping)( Addr, UInt, Char, Char, Char, UInt, UChar* ) -) -{ - Int i, j, n_tot, n_chunk, fd, i_eol; - Addr start, endPlusOne; - UChar* filename; - UInt foffset; - UChar rr, ww, xx, pp, ch; - - /* Read the initial memory mapping from the /proc filesystem. */ - fd = VG_(open_read) ( "/proc/self/maps" ); - if (fd == -1) { - VG_(message)(Vg_UserMsg, "FATAL: can't open /proc/self/maps"); - VG_(exit)(1); - } - n_tot = 0; - do { - n_chunk = VG_(read) ( fd, &procmap_buf[n_tot], M_PROCMAP_BUF - n_tot ); - n_tot += n_chunk; - } while ( n_chunk > 0 && n_tot < M_PROCMAP_BUF ); - VG_(close)(fd); - if (n_tot >= M_PROCMAP_BUF-5) { - VG_(message)(Vg_UserMsg, "FATAL: M_PROCMAP_BUF is too small; " - "increase it and recompile"); - VG_(exit)(1); - } - if (n_tot == 0) { - VG_(message)(Vg_UserMsg, "FATAL: I/O error on /proc/self/maps" ); - VG_(exit)(1); - } - procmap_buf[n_tot] = 0; - if (0) - VG_(message)(Vg_DebugMsg, "raw:\n%s", procmap_buf ); - - /* Ok, it's safely aboard. Parse the entries. */ - - i = 0; - while (True) { - if (i >= n_tot) break; - - /* Read (without fscanf :) the pattern %8x-%8x %c%c%c%c %8x */ - j = readhex(&procmap_buf[i], &start); - if (j > 0) i += j; else goto syntaxerror; - j = readchar(&procmap_buf[i], &ch); - if (j == 1 && ch == '-') i += j; else goto syntaxerror; - j = readhex(&procmap_buf[i], &endPlusOne); - if (j > 0) i += j; else goto syntaxerror; - - j = readchar(&procmap_buf[i], &ch); - if (j == 1 && ch == ' ') i += j; else goto syntaxerror; - - j = readchar(&procmap_buf[i], &rr); - if (j == 1 && (rr == 'r' || rr == '-')) i += j; else goto syntaxerror; - j = readchar(&procmap_buf[i], &ww); - if (j == 1 && (ww == 'w' || ww == '-')) i += j; else goto syntaxerror; - j = readchar(&procmap_buf[i], &xx); - if (j == 1 && (xx == 'x' || xx == '-')) i += j; else goto syntaxerror; - /* I haven't a clue what this last field means. */ - j = readchar(&procmap_buf[i], &pp); - if (j == 1 && (pp == 'p' || pp == '-' || pp == 's')) - i += j; else goto syntaxerror; - - j = readchar(&procmap_buf[i], &ch); - if (j == 1 && ch == ' ') i += j; else goto syntaxerror; - - j = readhex(&procmap_buf[i], &foffset); - if (j > 0) i += j; else goto syntaxerror; - - goto read_line_ok; - - syntaxerror: - VG_(message)(Vg_UserMsg, "FATAL: syntax error reading /proc/self/maps"); - { Int k; - VG_(printf)("last 50 chars: `"); - for (k = i-50; k <= i; k++) VG_(printf)("%c", procmap_buf[k]); - VG_(printf)("'\n"); - } - VG_(exit)(1); - - read_line_ok: - /* Try and find the name of the file mapped to this segment, if - it exists. */ - while (procmap_buf[i] != '\n' && i < M_PROCMAP_BUF-1) i++; - i_eol = i; - i--; - while (!VG_(isspace)(procmap_buf[i]) && i >= 0) i--; - i++; - if (i < i_eol-1 && procmap_buf[i] == '/') { - filename = &procmap_buf[i]; - filename[i_eol - i] = '\0'; - } else { - filename = NULL; - foffset = 0; - } - - (*record_mapping) ( start, endPlusOne-start, - rr, ww, xx, - foffset, filename ); - - i = i_eol + 1; - } -} - -/*--------------------------------------------------------------------*/ -/*--- end vg_procselfmaps.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_scheduler.c b/coregrind/vg_scheduler.c deleted file mode 100644 index 0ad56b10c5..0000000000 --- a/coregrind/vg_scheduler.c +++ /dev/null @@ -1,3426 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- A user-space pthreads implementation. vg_scheduler.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_include.h" -#include "vg_constants.h" -#include "valgrind.h" /* for VG_USERREQ__MAKE_NOACCESS and - VG_USERREQ__DO_LEAK_CHECK */ - -/* BORKAGE/ISSUES as of 29 May 02 - -- Currently, when a signal is run, just the ThreadStatus.status fields - are saved in the signal frame, along with the CPU state. Question: - should I also save and restore: - ThreadStatus.joiner - ThreadStatus.waited_on_mid - ThreadStatus.awaken_at - ThreadStatus.retval - Currently unsure, and so am not doing so. - -- Signals interrupting read/write and nanosleep: SA_RESTART settings. - Read/write correctly return with EINTR when SA_RESTART isn't - specified and they are interrupted by a signal. nanosleep just - pretends signals don't exist -- should be fixed. - -- So, what's the deal with signals and mutexes? If a thread is - blocked on a mutex, or for a condition variable for that matter, can - signals still be delivered to it? This has serious consequences -- - deadlocks, etc. - -- Signals still not really right. Each thread should have its - own pending-set, but there is just one process-wide pending set. - - TODO for valgrind-1.0: - -- Update assertion checking in scheduler_sanity(). - - TODO sometime: - -- poll() in the vg_libpthread.c -- should it handle the nanosleep - being interrupted by a signal? Ditto accept? - -- Mutex scrubbing - clearup_after_thread_exit: look for threads - blocked on mutexes held by the exiting thread, and release them - appropriately. (??) - -- pthread_atfork - -*/ - - -/* --------------------------------------------------------------------- - Types and globals for the scheduler. - ------------------------------------------------------------------ */ - -/* type ThreadId is defined in vg_include.h. */ - -/* struct ThreadState is defined in vg_include.h. */ - -/* Globals. A statically allocated array of threads. NOTE: [0] is - never used, to simplify the simulation of initialisers for - LinuxThreads. */ -ThreadState VG_(threads)[VG_N_THREADS]; - -/* The process' fork-handler stack. */ -static Int vg_fhstack_used = 0; -static ForkHandlerEntry vg_fhstack[VG_N_FORKHANDLERSTACK]; - - -/* The tid of the thread currently in VG_(baseBlock). */ -static Int vg_tid_currently_in_baseBlock = VG_INVALID_THREADID; - - -/* vg_oursignalhandler() might longjmp(). Here's the jmp_buf. */ -jmp_buf VG_(scheduler_jmpbuf); -/* This says whether scheduler_jmpbuf is actually valid. Needed so - that our signal handler doesn't longjmp when the buffer isn't - actually valid. */ -Bool VG_(scheduler_jmpbuf_valid) = False; -/* ... and if so, here's the signal which caused it to do so. */ -Int VG_(longjmpd_on_signal); - - -/* Machinery to keep track of which threads are waiting on which - fds. */ -typedef - struct { - /* The thread which made the request. */ - ThreadId tid; - - /* The next two fields describe the request. */ - /* File descriptor waited for. -1 means this slot is not in use */ - Int fd; - /* The syscall number the fd is used in. */ - Int syscall_no; - - /* False => still waiting for select to tell us the fd is ready - to go. True => the fd is ready, but the results have not yet - been delivered back to the calling thread. Once the latter - happens, this entire record is marked as no longer in use, by - making the fd field be -1. */ - Bool ready; - } - VgWaitedOnFd; - -static VgWaitedOnFd vg_waiting_fds[VG_N_WAITING_FDS]; - - -/* Keeping track of keys. */ -typedef - struct { - /* Has this key been allocated ? */ - Bool inuse; - /* If .inuse==True, records the address of the associated - destructor, or NULL if none. */ - void (*destructor)(void*); - } - ThreadKeyState; - -/* And our array of thread keys. */ -static ThreadKeyState vg_thread_keys[VG_N_THREAD_KEYS]; - -typedef UInt ThreadKey; - - -/* Forwards */ -static void do_client_request ( ThreadId tid ); -static void scheduler_sanity ( void ); -static void do_pthread_cond_timedwait_TIMEOUT ( ThreadId tid ); - - -/* --------------------------------------------------------------------- - Helper functions for the scheduler. - ------------------------------------------------------------------ */ - -__inline__ -Bool VG_(is_valid_tid) ( ThreadId tid ) -{ - /* tid is unsigned, hence no < 0 test. */ - if (tid == 0) return False; - if (tid >= VG_N_THREADS) return False; - if (VG_(threads)[tid].status == VgTs_Empty) return False; - return True; -} - - -__inline__ -Bool VG_(is_valid_or_empty_tid) ( ThreadId tid ) -{ - /* tid is unsigned, hence no < 0 test. */ - if (tid == 0) return False; - if (tid >= VG_N_THREADS) return False; - return True; -} - - -/* For constructing error messages only: try and identify a thread - whose stack this address currently falls within, or return - VG_INVALID_THREADID if it doesn't. A small complication is dealing - with any currently VG_(baseBlock)-resident thread. -*/ -ThreadId VG_(identify_stack_addr)( Addr a ) -{ - ThreadId tid, tid_to_skip; - - tid_to_skip = VG_INVALID_THREADID; - - /* First check to see if there's a currently-loaded thread in - VG_(baseBlock). */ - if (vg_tid_currently_in_baseBlock != VG_INVALID_THREADID) { - tid = vg_tid_currently_in_baseBlock; - if (VG_(baseBlock)[VGOFF_(m_esp)] <= a - && a <= VG_(threads)[tid].stack_highest_word) - return tid; - else - tid_to_skip = tid; - } - - for (tid = 1; tid < VG_N_THREADS; tid++) { - if (VG_(threads)[tid].status == VgTs_Empty) continue; - if (tid == tid_to_skip) continue; - if (VG_(threads)[tid].m_esp <= a - && a <= VG_(threads)[tid].stack_highest_word) - return tid; - } - return VG_INVALID_THREADID; -} - - -/* Print the scheduler status. */ -void VG_(pp_sched_status) ( void ) -{ - Int i; - VG_(printf)("\nsched status:\n"); - for (i = 1; i < VG_N_THREADS; i++) { - if (VG_(threads)[i].status == VgTs_Empty) continue; - VG_(printf)("\nThread %d: status = ", i); - switch (VG_(threads)[i].status) { - case VgTs_Runnable: VG_(printf)("Runnable"); break; - case VgTs_WaitFD: VG_(printf)("WaitFD"); break; - case VgTs_WaitJoinee: VG_(printf)("WaitJoinee(%d)", - VG_(threads)[i].joiner_jee_tid); - break; - case VgTs_WaitJoiner: VG_(printf)("WaitJoiner"); break; - case VgTs_Sleeping: VG_(printf)("Sleeping"); break; - case VgTs_WaitMX: VG_(printf)("WaitMX"); break; - case VgTs_WaitCV: VG_(printf)("WaitCV"); break; - case VgTs_WaitSIG: VG_(printf)("WaitSIG"); break; - default: VG_(printf)("???"); break; - } - VG_(printf)(", associated_mx = %p, associated_cv = %p\n", - VG_(threads)[i].associated_mx, - VG_(threads)[i].associated_cv ); - VG_(pp_ExeContext)( - VG_(get_ExeContext)( False, VG_(threads)[i].m_eip, - VG_(threads)[i].m_ebp )); - } - VG_(printf)("\n"); -} - -static -void add_waiting_fd ( ThreadId tid, Int fd, Int syscall_no ) -{ - Int i; - - vg_assert(fd != -1); /* avoid total chaos */ - - for (i = 0; i < VG_N_WAITING_FDS; i++) - if (vg_waiting_fds[i].fd == -1) - break; - - if (i == VG_N_WAITING_FDS) - VG_(panic)("add_waiting_fd: VG_N_WAITING_FDS is too low"); - /* - VG_(printf)("add_waiting_fd: add (tid %d, fd %d) at slot %d\n", - tid, fd, i); - */ - vg_waiting_fds[i].fd = fd; - vg_waiting_fds[i].tid = tid; - vg_waiting_fds[i].ready = False; - vg_waiting_fds[i].syscall_no = syscall_no; -} - - - -static -void print_sched_event ( ThreadId tid, Char* what ) -{ - VG_(message)(Vg_DebugMsg, " SCHED[%d]: %s", tid, what ); -} - - -static -void print_pthread_event ( ThreadId tid, Char* what ) -{ - VG_(message)(Vg_DebugMsg, "PTHREAD[%d]: %s", tid, what ); -} - - -static -Char* name_of_sched_event ( UInt event ) -{ - switch (event) { - case VG_TRC_EBP_JMP_SYSCALL: return "SYSCALL"; - case VG_TRC_EBP_JMP_CLIENTREQ: return "CLIENTREQ"; - case VG_TRC_INNER_COUNTERZERO: return "COUNTERZERO"; - case VG_TRC_INNER_FASTMISS: return "FASTMISS"; - case VG_TRC_UNRESUMABLE_SIGNAL: return "FATALSIGNAL"; - default: return "??UNKNOWN??"; - } -} - - -/* Create a translation of the client basic block beginning at - orig_addr, and add it to the translation cache & translation table. - This probably doesn't really belong here, but, hey ... -*/ -static -void create_translation_for ( ThreadId tid, Addr orig_addr ) -{ - Addr trans_addr; - TTEntry tte; - Int orig_size, trans_size; - /* Ensure there is space to hold a translation. */ - VG_(maybe_do_lru_pass)(); - VG_(translate)( &VG_(threads)[tid], - orig_addr, &orig_size, &trans_addr, &trans_size ); - /* Copy data at trans_addr into the translation cache. - Returned pointer is to the code, not to the 4-byte - header. */ - /* Since the .orig_size and .trans_size fields are - UShort, be paranoid. */ - vg_assert(orig_size > 0 && orig_size < 65536); - vg_assert(trans_size > 0 && trans_size < 65536); - tte.orig_size = orig_size; - tte.orig_addr = orig_addr; - tte.trans_size = trans_size; - tte.trans_addr = VG_(copy_to_transcache) - ( trans_addr, trans_size ); - tte.mru_epoch = VG_(current_epoch); - /* Free the intermediary -- was allocated by VG_(emit_code). */ - VG_(jitfree)( (void*)trans_addr ); - /* Add to trans tab and set back pointer. */ - VG_(add_to_trans_tab) ( &tte ); - /* Update stats. */ - VG_(this_epoch_in_count) ++; - VG_(this_epoch_in_osize) += orig_size; - VG_(this_epoch_in_tsize) += trans_size; - VG_(overall_in_count) ++; - VG_(overall_in_osize) += orig_size; - VG_(overall_in_tsize) += trans_size; -} - - -/* Allocate a completely empty ThreadState record. */ -static -ThreadId vg_alloc_ThreadState ( void ) -{ - Int i; - for (i = 1; i < VG_N_THREADS; i++) { - if (VG_(threads)[i].status == VgTs_Empty) - return i; - } - VG_(printf)("vg_alloc_ThreadState: no free slots available\n"); - VG_(printf)("Increase VG_N_THREADS, rebuild and try again.\n"); - VG_(panic)("VG_N_THREADS is too low"); - /*NOTREACHED*/ -} - - -ThreadState* VG_(get_current_thread_state) ( void ) -{ - vg_assert(VG_(is_valid_tid)(vg_tid_currently_in_baseBlock)); - return & VG_(threads)[vg_tid_currently_in_baseBlock]; -} - - -ThreadId VG_(get_current_tid) ( void ) -{ - vg_assert(VG_(is_valid_tid)(vg_tid_currently_in_baseBlock)); - return vg_tid_currently_in_baseBlock; -} - - -/* Copy the saved state of a thread into VG_(baseBlock), ready for it - to be run. */ -__inline__ -void VG_(load_thread_state) ( ThreadId tid ) -{ - Int i; - vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID); - - VG_(baseBlock)[VGOFF_(m_eax)] = VG_(threads)[tid].m_eax; - VG_(baseBlock)[VGOFF_(m_ebx)] = VG_(threads)[tid].m_ebx; - VG_(baseBlock)[VGOFF_(m_ecx)] = VG_(threads)[tid].m_ecx; - VG_(baseBlock)[VGOFF_(m_edx)] = VG_(threads)[tid].m_edx; - VG_(baseBlock)[VGOFF_(m_esi)] = VG_(threads)[tid].m_esi; - VG_(baseBlock)[VGOFF_(m_edi)] = VG_(threads)[tid].m_edi; - VG_(baseBlock)[VGOFF_(m_ebp)] = VG_(threads)[tid].m_ebp; - VG_(baseBlock)[VGOFF_(m_esp)] = VG_(threads)[tid].m_esp; - VG_(baseBlock)[VGOFF_(m_eflags)] = VG_(threads)[tid].m_eflags; - VG_(baseBlock)[VGOFF_(m_eip)] = VG_(threads)[tid].m_eip; - - for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) - VG_(baseBlock)[VGOFF_(m_fpustate) + i] = VG_(threads)[tid].m_fpu[i]; - - VG_(baseBlock)[VGOFF_(sh_eax)] = VG_(threads)[tid].sh_eax; - VG_(baseBlock)[VGOFF_(sh_ebx)] = VG_(threads)[tid].sh_ebx; - VG_(baseBlock)[VGOFF_(sh_ecx)] = VG_(threads)[tid].sh_ecx; - VG_(baseBlock)[VGOFF_(sh_edx)] = VG_(threads)[tid].sh_edx; - VG_(baseBlock)[VGOFF_(sh_esi)] = VG_(threads)[tid].sh_esi; - VG_(baseBlock)[VGOFF_(sh_edi)] = VG_(threads)[tid].sh_edi; - VG_(baseBlock)[VGOFF_(sh_ebp)] = VG_(threads)[tid].sh_ebp; - VG_(baseBlock)[VGOFF_(sh_esp)] = VG_(threads)[tid].sh_esp; - VG_(baseBlock)[VGOFF_(sh_eflags)] = VG_(threads)[tid].sh_eflags; - - vg_tid_currently_in_baseBlock = tid; -} - - -/* Copy the state of a thread from VG_(baseBlock), presumably after it - has been descheduled. For sanity-check purposes, fill the vacated - VG_(baseBlock) with garbage so as to make the system more likely to - fail quickly if we erroneously continue to poke around inside - VG_(baseBlock) without first doing a load_thread_state(). -*/ -__inline__ -void VG_(save_thread_state) ( ThreadId tid ) -{ - Int i; - const UInt junk = 0xDEADBEEF; - - vg_assert(vg_tid_currently_in_baseBlock != VG_INVALID_THREADID); - - VG_(threads)[tid].m_eax = VG_(baseBlock)[VGOFF_(m_eax)]; - VG_(threads)[tid].m_ebx = VG_(baseBlock)[VGOFF_(m_ebx)]; - VG_(threads)[tid].m_ecx = VG_(baseBlock)[VGOFF_(m_ecx)]; - VG_(threads)[tid].m_edx = VG_(baseBlock)[VGOFF_(m_edx)]; - VG_(threads)[tid].m_esi = VG_(baseBlock)[VGOFF_(m_esi)]; - VG_(threads)[tid].m_edi = VG_(baseBlock)[VGOFF_(m_edi)]; - VG_(threads)[tid].m_ebp = VG_(baseBlock)[VGOFF_(m_ebp)]; - VG_(threads)[tid].m_esp = VG_(baseBlock)[VGOFF_(m_esp)]; - VG_(threads)[tid].m_eflags = VG_(baseBlock)[VGOFF_(m_eflags)]; - VG_(threads)[tid].m_eip = VG_(baseBlock)[VGOFF_(m_eip)]; - - for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) - VG_(threads)[tid].m_fpu[i] = VG_(baseBlock)[VGOFF_(m_fpustate) + i]; - - VG_(threads)[tid].sh_eax = VG_(baseBlock)[VGOFF_(sh_eax)]; - VG_(threads)[tid].sh_ebx = VG_(baseBlock)[VGOFF_(sh_ebx)]; - VG_(threads)[tid].sh_ecx = VG_(baseBlock)[VGOFF_(sh_ecx)]; - VG_(threads)[tid].sh_edx = VG_(baseBlock)[VGOFF_(sh_edx)]; - VG_(threads)[tid].sh_esi = VG_(baseBlock)[VGOFF_(sh_esi)]; - VG_(threads)[tid].sh_edi = VG_(baseBlock)[VGOFF_(sh_edi)]; - VG_(threads)[tid].sh_ebp = VG_(baseBlock)[VGOFF_(sh_ebp)]; - VG_(threads)[tid].sh_esp = VG_(baseBlock)[VGOFF_(sh_esp)]; - VG_(threads)[tid].sh_eflags = VG_(baseBlock)[VGOFF_(sh_eflags)]; - - /* Fill it up with junk. */ - VG_(baseBlock)[VGOFF_(m_eax)] = junk; - VG_(baseBlock)[VGOFF_(m_ebx)] = junk; - VG_(baseBlock)[VGOFF_(m_ecx)] = junk; - VG_(baseBlock)[VGOFF_(m_edx)] = junk; - VG_(baseBlock)[VGOFF_(m_esi)] = junk; - VG_(baseBlock)[VGOFF_(m_edi)] = junk; - VG_(baseBlock)[VGOFF_(m_ebp)] = junk; - VG_(baseBlock)[VGOFF_(m_esp)] = junk; - VG_(baseBlock)[VGOFF_(m_eflags)] = junk; - VG_(baseBlock)[VGOFF_(m_eip)] = junk; - - for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) - VG_(baseBlock)[VGOFF_(m_fpustate) + i] = junk; - - vg_tid_currently_in_baseBlock = VG_INVALID_THREADID; -} - - -/* Run the thread tid for a while, and return a VG_TRC_* value to the - scheduler indicating what happened. */ -static -UInt run_thread_for_a_while ( ThreadId tid ) -{ - volatile UInt trc = 0; - vg_assert(VG_(is_valid_tid)(tid)); - vg_assert(VG_(threads)[tid].status == VgTs_Runnable); - vg_assert(VG_(bbs_to_go) > 0); - vg_assert(!VG_(scheduler_jmpbuf_valid)); - - VGP_PUSHCC(VgpRun); - VG_(load_thread_state) ( tid ); - if (__builtin_setjmp(VG_(scheduler_jmpbuf)) == 0) { - /* try this ... */ - VG_(scheduler_jmpbuf_valid) = True; - trc = VG_(run_innerloop)(); - VG_(scheduler_jmpbuf_valid) = False; - /* We get here if the client didn't take a fault. */ - } else { - /* We get here if the client took a fault, which caused our - signal handler to longjmp. */ - VG_(scheduler_jmpbuf_valid) = False; - vg_assert(trc == 0); - trc = VG_TRC_UNRESUMABLE_SIGNAL; - } - - vg_assert(!VG_(scheduler_jmpbuf_valid)); - - VG_(save_thread_state) ( tid ); - VGP_POPCC; - return trc; -} - - -/* Increment the LRU epoch counter. */ -static -void increment_epoch ( void ) -{ - VG_(current_epoch)++; - if (VG_(clo_verbosity) > 2) { - UInt tt_used, tc_used; - VG_(get_tt_tc_used) ( &tt_used, &tc_used ); - VG_(message)(Vg_UserMsg, - "%lu bbs, in: %d (%d -> %d), out %d (%d -> %d), TT %d, TC %d", - VG_(bbs_done), - VG_(this_epoch_in_count), - VG_(this_epoch_in_osize), - VG_(this_epoch_in_tsize), - VG_(this_epoch_out_count), - VG_(this_epoch_out_osize), - VG_(this_epoch_out_tsize), - tt_used, tc_used - ); - } - VG_(this_epoch_in_count) = 0; - VG_(this_epoch_in_osize) = 0; - VG_(this_epoch_in_tsize) = 0; - VG_(this_epoch_out_count) = 0; - VG_(this_epoch_out_osize) = 0; - VG_(this_epoch_out_tsize) = 0; -} - - -static -void mostly_clear_thread_record ( ThreadId tid ) -{ - Int j; - vg_assert(tid >= 0 && tid < VG_N_THREADS); - VG_(threads)[tid].tid = tid; - VG_(threads)[tid].status = VgTs_Empty; - VG_(threads)[tid].associated_mx = NULL; - VG_(threads)[tid].associated_cv = NULL; - VG_(threads)[tid].awaken_at = 0; - VG_(threads)[tid].joinee_retval = NULL; - VG_(threads)[tid].joiner_thread_return = NULL; - VG_(threads)[tid].joiner_jee_tid = VG_INVALID_THREADID; - VG_(threads)[tid].detached = False; - VG_(threads)[tid].cancel_st = True; /* PTHREAD_CANCEL_ENABLE */ - VG_(threads)[tid].cancel_ty = True; /* PTHREAD_CANCEL_DEFERRED */ - VG_(threads)[tid].cancel_pend = NULL; /* not pending */ - VG_(threads)[tid].custack_used = 0; - VG_(threads)[tid].n_signals_returned = 0; - VG_(ksigemptyset)(&VG_(threads)[tid].sig_mask); - VG_(ksigemptyset)(&VG_(threads)[tid].sigs_waited_for); - for (j = 0; j < VG_N_THREAD_KEYS; j++) - VG_(threads)[tid].specifics[j] = NULL; -} - - -/* Initialise the scheduler. Create a single "main" thread ready to - run, with special ThreadId of one. This is called at startup; the - caller takes care to park the client's state is parked in - VG_(baseBlock). -*/ -void VG_(scheduler_init) ( void ) -{ - Int i; - Addr startup_esp; - ThreadId tid_main; - - startup_esp = VG_(baseBlock)[VGOFF_(m_esp)]; - - if (VG_STACK_MATCHES_BASE(startup_esp, VG_STARTUP_STACK_BASE_1) - || VG_STACK_MATCHES_BASE(startup_esp, VG_STARTUP_STACK_BASE_2) - || VG_STACK_MATCHES_BASE(startup_esp, VG_STARTUP_STACK_BASE_3)) { - /* Jolly good! */ - } else { - VG_(printf)("%%esp at startup = %p is not near %p, %p or %p; aborting\n", - (void*)startup_esp, - (void*)VG_STARTUP_STACK_BASE_1, - (void*)VG_STARTUP_STACK_BASE_2, - (void*)VG_STARTUP_STACK_BASE_3 ); - VG_(panic)("unexpected %esp at startup"); - } - - for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) { - mostly_clear_thread_record(i); - VG_(threads)[i].stack_size = 0; - VG_(threads)[i].stack_base = (Addr)NULL; - VG_(threads)[i].stack_highest_word = (Addr)NULL; - } - - for (i = 0; i < VG_N_WAITING_FDS; i++) - vg_waiting_fds[i].fd = -1; /* not in use */ - - for (i = 0; i < VG_N_THREAD_KEYS; i++) { - vg_thread_keys[i].inuse = False; - vg_thread_keys[i].destructor = NULL; - } - - vg_fhstack_used = 0; - - /* Assert this is thread zero, which has certain magic - properties. */ - tid_main = vg_alloc_ThreadState(); - vg_assert(tid_main == 1); - VG_(threads)[tid_main].status = VgTs_Runnable; - - /* Copy VG_(baseBlock) state to tid_main's slot. */ - vg_tid_currently_in_baseBlock = tid_main; - VG_(save_thread_state) ( tid_main ); - - VG_(threads)[tid_main].stack_highest_word - = VG_(threads)[tid_main].m_esp /* -4 ??? */; - - /* So now ... */ - vg_assert(vg_tid_currently_in_baseBlock == VG_INVALID_THREADID); - - /* Not running client code right now. */ - VG_(scheduler_jmpbuf_valid) = False; -} - - -/* What if fd isn't a valid fd? */ -static -void set_fd_nonblocking ( Int fd ) -{ - Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 ); - vg_assert(!VG_(is_kerror)(res)); - res |= VKI_O_NONBLOCK; - res = VG_(fcntl)( fd, VKI_F_SETFL, res ); - vg_assert(!VG_(is_kerror)(res)); -} - -static -void set_fd_blocking ( Int fd ) -{ - Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 ); - vg_assert(!VG_(is_kerror)(res)); - res &= ~VKI_O_NONBLOCK; - res = VG_(fcntl)( fd, VKI_F_SETFL, res ); - vg_assert(!VG_(is_kerror)(res)); -} - -static -Bool fd_is_blockful ( Int fd ) -{ - Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 ); - vg_assert(!VG_(is_kerror)(res)); - return (res & VKI_O_NONBLOCK) ? False : True; -} - -static -Bool fd_is_valid ( Int fd ) -{ - Int res = VG_(fcntl)( fd, VKI_F_GETFL, 0 ); - return VG_(is_kerror)(res) ? False : True; -} - - - -/* vthread tid is returning from a signal handler; modify its - stack/regs accordingly. */ - -/* [Helper fn for handle_signal_return] tid, assumed to be in WaitFD - for read or write, has been interrupted by a signal. Find and - clear the relevant vg_waiting_fd[] entry. Most of the code in this - procedure is total paranoia, if you look closely. */ -static -void cleanup_waiting_fd_table ( ThreadId tid ) -{ - Int i, waiters; - - vg_assert(VG_(is_valid_tid)(tid)); - vg_assert(VG_(threads)[tid].status == VgTs_WaitFD); - vg_assert(VG_(threads)[tid].m_eax == __NR_read - || VG_(threads)[tid].m_eax == __NR_write); - - /* Excessively paranoidly ... find the fd this op was waiting - for, and mark it as not being waited on. */ - waiters = 0; - for (i = 0; i < VG_N_WAITING_FDS; i++) { - if (vg_waiting_fds[i].tid == tid) { - waiters++; - vg_assert(vg_waiting_fds[i].syscall_no == VG_(threads)[tid].m_eax); - } - } - vg_assert(waiters == 1); - for (i = 0; i < VG_N_WAITING_FDS; i++) - if (vg_waiting_fds[i].tid == tid) - break; - vg_assert(i < VG_N_WAITING_FDS); - vg_assert(vg_waiting_fds[i].fd != -1); - vg_waiting_fds[i].fd = -1; /* not in use */ -} - - -static -void handle_signal_return ( ThreadId tid ) -{ - Char msg_buf[100]; - Bool restart_blocked_syscalls; - struct vki_timespec * rem; - - vg_assert(VG_(is_valid_tid)(tid)); - - /* Increment signal-returned counter. Used only to implement - pause(). */ - VG_(threads)[tid].n_signals_returned++; - - restart_blocked_syscalls = VG_(signal_returns)(tid); - - if (restart_blocked_syscalls) - /* Easy; we don't have to do anything. */ - return; - - if (VG_(threads)[tid].status == VgTs_WaitFD - && (VG_(threads)[tid].m_eax == __NR_read - || VG_(threads)[tid].m_eax == __NR_write)) { - /* read() or write() interrupted. Force a return with EINTR. */ - cleanup_waiting_fd_table(tid); - VG_(threads)[tid].m_eax = -VKI_EINTR; - VG_(threads)[tid].status = VgTs_Runnable; - - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, - "read() / write() interrupted by signal; return EINTR" ); - print_sched_event(tid, msg_buf); - } - return; - } - - if (VG_(threads)[tid].status == VgTs_Sleeping - && VG_(threads)[tid].m_eax == __NR_nanosleep) { - /* We interrupted a nanosleep(). The right thing to do is to - write the unused time to nanosleep's second param, but that's - too much effort ... we just say that 1 nanosecond was not - used, and return EINTR. */ - rem = (struct vki_timespec *)VG_(threads)[tid].m_ecx; /* arg2 */ - if (rem != NULL) { - rem->tv_sec = 0; - rem->tv_nsec = 1; - } - SET_EAX(tid, -VKI_EINTR); - VG_(threads)[tid].status = VgTs_Runnable; - return; - } - - if (VG_(threads)[tid].status == VgTs_WaitFD) { - VG_(panic)("handle_signal_return: unknown interrupted syscall"); - } - - /* All other cases? Just return. */ -} - - -static -void sched_do_syscall ( ThreadId tid ) -{ - UInt saved_eax; - UInt res, syscall_no; - UInt fd; - Bool orig_fd_blockness; - Char msg_buf[100]; - - vg_assert(VG_(is_valid_tid)(tid)); - vg_assert(VG_(threads)[tid].status == VgTs_Runnable); - - syscall_no = VG_(threads)[tid].m_eax; /* syscall number */ - - if (syscall_no == __NR_nanosleep) { - UInt t_now, t_awaken; - struct vki_timespec* req; - req = (struct vki_timespec*)VG_(threads)[tid].m_ebx; /* arg1 */ - t_now = VG_(read_millisecond_timer)(); - t_awaken - = t_now - + (UInt)1000ULL * (UInt)(req->tv_sec) - + (UInt)(req->tv_nsec) / 1000000; - VG_(threads)[tid].status = VgTs_Sleeping; - VG_(threads)[tid].awaken_at = t_awaken; - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, "at %d: nanosleep for %d", - t_now, t_awaken-t_now); - print_sched_event(tid, msg_buf); - } - /* Force the scheduler to run something else for a while. */ - return; - } - - if (syscall_no != __NR_read && syscall_no != __NR_write) { - /* We think it's non-blocking. Just do it in the normal way. */ - VG_(perform_assumed_nonblocking_syscall)(tid); - /* The thread is still runnable. */ - return; - } - - /* Set the fd to nonblocking, and do the syscall, which will return - immediately, in order to lodge a request with the Linux kernel. - We later poll for I/O completion using select(). */ - - fd = VG_(threads)[tid].m_ebx /* arg1 */; - - /* Deal with error case immediately. */ - if (!fd_is_valid(fd)) { - VG_(message)(Vg_UserMsg, - "Warning: invalid file descriptor %d in syscall %s", - fd, syscall_no == __NR_read ? "read()" : "write()" ); - VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */); - KERNEL_DO_SYSCALL(tid, res); - VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */); - /* We're still runnable. */ - vg_assert(VG_(threads)[tid].status == VgTs_Runnable); - return; - } - - /* From here onwards we know that fd is valid. */ - - orig_fd_blockness = fd_is_blockful(fd); - set_fd_nonblocking(fd); - vg_assert(!fd_is_blockful(fd)); - VG_(check_known_blocking_syscall)(tid, syscall_no, NULL /* PRE */); - - /* This trashes the thread's %eax; we have to preserve it. */ - saved_eax = VG_(threads)[tid].m_eax; - KERNEL_DO_SYSCALL(tid,res); - - /* Restore original blockfulness of the fd. */ - if (orig_fd_blockness) - set_fd_blocking(fd); - else - set_fd_nonblocking(fd); - - if (res != -VKI_EWOULDBLOCK || !orig_fd_blockness) { - /* Finish off in the normal way. Don't restore %EAX, since that - now (correctly) holds the result of the call. We get here if either: - 1. The call didn't block, or - 2. The fd was already in nonblocking mode before we started to - mess with it. In this case, we're not expecting to handle - the I/O completion -- the client is. So don't file a - completion-wait entry. - */ - VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */); - /* We're still runnable. */ - vg_assert(VG_(threads)[tid].status == VgTs_Runnable); - - } else { - - vg_assert(res == -VKI_EWOULDBLOCK && orig_fd_blockness); - - /* It would have blocked. First, restore %EAX to what it was - before our speculative call. */ - VG_(threads)[tid].m_eax = saved_eax; - /* Put this fd in a table of fds on which we are waiting for - completion. The arguments for select() later are constructed - from this table. */ - add_waiting_fd(tid, fd, saved_eax /* which holds the syscall # */); - /* Deschedule thread until an I/O completion happens. */ - VG_(threads)[tid].status = VgTs_WaitFD; - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf,"block until I/O ready on fd %d", fd); - print_sched_event(tid, msg_buf); - } - - } -} - - -/* Find out which of the fds in vg_waiting_fds are now ready to go, by - making enquiries with select(), and mark them as ready. We have to - wait for the requesting threads to fall into the the WaitFD state - before we can actually finally deliver the results, so this - procedure doesn't do that; complete_blocked_syscalls() does it. - - It might seem odd that a thread which has done a blocking syscall - is not in WaitFD state; the way this can happen is if it initially - becomes WaitFD, but then a signal is delivered to it, so it becomes - Runnable for a while. In this case we have to wait for the - sighandler to return, whereupon the WaitFD state is resumed, and - only at that point can the I/O result be delivered to it. However, - this point may be long after the fd is actually ready. - - So, poll_for_ready_fds() merely detects fds which are ready. - complete_blocked_syscalls() does the second half of the trick, - possibly much later: it delivers the results from ready fds to - threads in WaitFD state. -*/ -static -void poll_for_ready_fds ( void ) -{ - vki_ksigset_t saved_procmask; - vki_fd_set readfds; - vki_fd_set writefds; - vki_fd_set exceptfds; - struct vki_timeval timeout; - Int fd, fd_max, i, n_ready, syscall_no, n_ok; - ThreadId tid; - Bool rd_ok, wr_ok, ex_ok; - Char msg_buf[100]; - - struct vki_timespec* rem; - UInt t_now; - - /* Awaken any sleeping threads whose sleep has expired. */ - for (tid = 1; tid < VG_N_THREADS; tid++) - if (VG_(threads)[tid].status == VgTs_Sleeping) - break; - - /* Avoid pointless calls to VG_(read_millisecond_timer). */ - if (tid < VG_N_THREADS) { - t_now = VG_(read_millisecond_timer)(); - for (tid = 1; tid < VG_N_THREADS; tid++) { - if (VG_(threads)[tid].status != VgTs_Sleeping) - continue; - if (t_now >= VG_(threads)[tid].awaken_at) { - /* Resume this thread. Set to zero the remaining-time - (second) arg of nanosleep, since it's used up all its - time. */ - vg_assert(VG_(threads)[tid].m_eax == __NR_nanosleep); - rem = (struct vki_timespec *)VG_(threads)[tid].m_ecx; /* arg2 */ - if (rem != NULL) { - rem->tv_sec = 0; - rem->tv_nsec = 0; - } - /* Make the syscall return 0 (success). */ - VG_(threads)[tid].m_eax = 0; - /* Reschedule this thread. */ - VG_(threads)[tid].status = VgTs_Runnable; - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, "at %d: nanosleep done", - t_now); - print_sched_event(tid, msg_buf); - } - } - } - } - - /* And look for threads waiting on file descriptors which are now - ready for I/O.*/ - timeout.tv_sec = 0; - timeout.tv_usec = 0; - - VKI_FD_ZERO(&readfds); - VKI_FD_ZERO(&writefds); - VKI_FD_ZERO(&exceptfds); - fd_max = -1; - for (i = 0; i < VG_N_WAITING_FDS; i++) { - if (vg_waiting_fds[i].fd == -1 /* not in use */) - continue; - if (vg_waiting_fds[i].ready /* already ready? */) - continue; - fd = vg_waiting_fds[i].fd; - /* VG_(printf)("adding QUERY for fd %d\n", fd); */ - vg_assert(fd >= 0); - if (fd > fd_max) - fd_max = fd; - tid = vg_waiting_fds[i].tid; - vg_assert(VG_(is_valid_tid)(tid)); - syscall_no = vg_waiting_fds[i].syscall_no; - switch (syscall_no) { - case __NR_read: - /* In order to catch timeout events on fds which are - readable and which have been ioctl(TCSETA)'d with a - VTIMEout, we appear to need to ask if the fd is - writable, for some reason. Ask me not why. Since this - is strange and potentially troublesome we only do it if - the user asks specially. */ - if (VG_(strstr)(VG_(clo_weird_hacks), "ioctl-VTIME") != NULL) - VKI_FD_SET(fd, &writefds); - VKI_FD_SET(fd, &readfds); break; - case __NR_write: - VKI_FD_SET(fd, &writefds); break; - default: - VG_(panic)("poll_for_ready_fds: unexpected syscall"); - /*NOTREACHED*/ - break; - } - } - - /* Short cut: if no fds are waiting, give up now. */ - if (fd_max == -1) - return; - - /* BLOCK ALL SIGNALS. We don't want the complication of select() - getting interrupted. */ - VG_(block_all_host_signals)( &saved_procmask ); - - n_ready = VG_(select) - ( fd_max+1, &readfds, &writefds, &exceptfds, &timeout); - if (VG_(is_kerror)(n_ready)) { - VG_(printf)("poll_for_ready_fds: select returned %d\n", n_ready); - VG_(panic)("poll_for_ready_fds: select failed?!"); - /*NOTREACHED*/ - } - - /* UNBLOCK ALL SIGNALS */ - VG_(restore_all_host_signals)( &saved_procmask ); - - /* VG_(printf)("poll_for_io_completions: %d fs ready\n", n_ready); */ - - if (n_ready == 0) - return; - - /* Inspect all the fds we know about, and handle any completions that - have happened. */ - /* - VG_(printf)("\n\n"); - for (fd = 0; fd < 100; fd++) - if (VKI_FD_ISSET(fd, &writefds) || VKI_FD_ISSET(fd, &readfds)) { - VG_(printf)("X"); } else { VG_(printf)("."); }; - VG_(printf)("\n\nfd_max = %d\n", fd_max); - */ - - for (fd = 0; fd <= fd_max; fd++) { - rd_ok = VKI_FD_ISSET(fd, &readfds); - wr_ok = VKI_FD_ISSET(fd, &writefds); - ex_ok = VKI_FD_ISSET(fd, &exceptfds); - - n_ok = (rd_ok ? 1 : 0) + (wr_ok ? 1 : 0) + (ex_ok ? 1 : 0); - if (n_ok == 0) - continue; - if (n_ok > 1) { - VG_(printf)("offending fd = %d\n", fd); - VG_(panic)("poll_for_ready_fds: multiple events on fd"); - } - - /* An I/O event completed for fd. Find the thread which - requested this. */ - for (i = 0; i < VG_N_WAITING_FDS; i++) { - if (vg_waiting_fds[i].fd == -1 /* not in use */) - continue; - if (vg_waiting_fds[i].fd == fd) - break; - } - - /* And a bit more paranoia ... */ - vg_assert(i >= 0 && i < VG_N_WAITING_FDS); - - /* Mark the fd as ready. */ - vg_assert(! vg_waiting_fds[i].ready); - vg_waiting_fds[i].ready = True; - } -} - - -/* See comment attached to poll_for_ready_fds() for explaination. */ -static -void complete_blocked_syscalls ( void ) -{ - Int fd, i, res, syscall_no; - ThreadId tid; - Char msg_buf[100]; - - /* Inspect all the outstanding fds we know about. */ - - for (i = 0; i < VG_N_WAITING_FDS; i++) { - if (vg_waiting_fds[i].fd == -1 /* not in use */) - continue; - if (! vg_waiting_fds[i].ready) - continue; - - fd = vg_waiting_fds[i].fd; - tid = vg_waiting_fds[i].tid; - vg_assert(VG_(is_valid_tid)(tid)); - - /* The thread actually has to be waiting for the I/O event it - requested before we can deliver the result! */ - if (VG_(threads)[tid].status != VgTs_WaitFD) - continue; - - /* Ok, actually do it! We can safely use %EAX as the syscall - number, because the speculative call made by - sched_do_syscall() doesn't change %EAX in the case where the - call would have blocked. */ - syscall_no = vg_waiting_fds[i].syscall_no; - vg_assert(syscall_no == VG_(threads)[tid].m_eax); - - /* In a rare case pertaining to writing into a pipe, write() - will block when asked to write > 4096 bytes even though the - kernel claims, when asked via select(), that blocking will - not occur for a write on that fd. This can cause deadlocks. - An easy answer is to limit the size of the write to 4096 - anyway and hope that the client program's logic can handle - the short write. That shoulds dubious to me, so we don't do - it by default. */ - if (syscall_no == __NR_write - && VG_(threads)[tid].m_edx /* arg3, count */ > 4096 - && VG_(strstr)(VG_(clo_weird_hacks), "truncate-writes") != NULL) { - /* VG_(printf)("truncate write from %d to 4096\n", - VG_(threads)[tid].m_edx ); */ - VG_(threads)[tid].m_edx = 4096; - } - - KERNEL_DO_SYSCALL(tid,res); - VG_(check_known_blocking_syscall)(tid, syscall_no, &res /* POST */); - - /* Reschedule. */ - VG_(threads)[tid].status = VgTs_Runnable; - /* Mark slot as no longer in use. */ - vg_waiting_fds[i].fd = -1; - /* pp_sched_status(); */ - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf,"resume due to I/O completion on fd %d", fd); - print_sched_event(tid, msg_buf); - } - } -} - - -static -void check_for_pthread_cond_timedwait ( void ) -{ - Int i, now; - for (i = 1; i < VG_N_THREADS; i++) { - if (VG_(threads)[i].status != VgTs_WaitCV) - continue; - if (VG_(threads)[i].awaken_at == 0xFFFFFFFF /* no timeout */) - continue; - now = VG_(read_millisecond_timer)(); - if (now >= VG_(threads)[i].awaken_at) { - do_pthread_cond_timedwait_TIMEOUT(i); - } - } -} - - -static -void nanosleep_for_a_while ( void ) -{ - Int res; - struct vki_timespec req; - struct vki_timespec rem; - req.tv_sec = 0; - req.tv_nsec = 20 * 1000 * 1000; - res = VG_(nanosleep)( &req, &rem ); - vg_assert(res == 0 /* ok */ || res == 1 /* interrupted by signal */); -} - - -/* --------------------------------------------------------------------- - The scheduler proper. - ------------------------------------------------------------------ */ - -/* Run user-space threads until either - * Deadlock occurs - * One thread asks to shutdown Valgrind - * The specified number of basic blocks has gone by. -*/ -VgSchedReturnCode VG_(scheduler) ( void ) -{ - ThreadId tid, tid_next; - UInt trc; - UInt dispatch_ctr_SAVED; - Int done_this_time, n_in_bounded_wait; - Addr trans_addr; - Bool sigs_delivered; - - /* For the LRU structures, records when the epoch began. */ - ULong lru_epoch_started_at = 0; - - /* Start with the root thread. tid in general indicates the - currently runnable/just-finished-running thread. */ - VG_(last_run_tid) = tid = 1; - - /* This is the top level scheduler loop. It falls into three - phases. */ - while (True) { - - /* ======================= Phase 0 of 3 ======================= - Be paranoid. Always a good idea. */ - stage1: - scheduler_sanity(); - VG_(do_sanity_checks)( False ); - - /* ======================= Phase 1 of 3 ======================= - Handle I/O completions and signals. This may change the - status of various threads. Then select a new thread to run, - or declare deadlock, or sleep if there are no runnable - threads but some are blocked on I/O. */ - - /* Age the LRU structures if an epoch has been completed. */ - if (VG_(bbs_done) - lru_epoch_started_at >= VG_BBS_PER_EPOCH) { - lru_epoch_started_at = VG_(bbs_done); - increment_epoch(); - } - - /* Was a debug-stop requested? */ - if (VG_(bbs_to_go) == 0) - goto debug_stop; - - /* Do the following loop until a runnable thread is found, or - deadlock is detected. */ - while (True) { - - /* For stats purposes only. */ - VG_(num_scheduling_events_MAJOR) ++; - - /* See if any I/O operations which we were waiting for have - completed, and, if so, make runnable the relevant waiting - threads. */ - poll_for_ready_fds(); - complete_blocked_syscalls(); - check_for_pthread_cond_timedwait(); - - /* See if there are any signals which need to be delivered. If - so, choose thread(s) to deliver them to, and build signal - delivery frames on those thread(s) stacks. */ - - /* Be careful about delivering signals to a thread waiting - for a mutex. In particular, when the handler is running, - that thread is temporarily apparently-not-waiting for the - mutex, so if it is unlocked by another thread whilst the - handler is running, this thread is not informed. When the - handler returns, the thread resumes waiting on the mutex, - even if, as a result, it has missed the unlocking of it. - Potential deadlock. This sounds all very strange, but the - POSIX standard appears to require this behaviour. */ - sigs_delivered = VG_(deliver_signals)(); - if (sigs_delivered) - VG_(do_sanity_checks)( False ); - - /* Try and find a thread (tid) to run. */ - tid_next = tid; - n_in_bounded_wait = 0; - while (True) { - tid_next++; - if (tid_next >= VG_N_THREADS) tid_next = 1; - if (VG_(threads)[tid_next].status == VgTs_WaitFD - || VG_(threads)[tid_next].status == VgTs_Sleeping - || VG_(threads)[tid_next].status == VgTs_WaitSIG - || (VG_(threads)[tid_next].status == VgTs_WaitCV - && VG_(threads)[tid_next].awaken_at != 0xFFFFFFFF)) - n_in_bounded_wait ++; - if (VG_(threads)[tid_next].status == VgTs_Runnable) - break; /* We can run this one. */ - if (tid_next == tid) - break; /* been all the way round */ - } - tid = tid_next; - - if (VG_(threads)[tid].status == VgTs_Runnable) { - /* Found a suitable candidate. Fall out of this loop, so - we can advance to stage 2 of the scheduler: actually - running the thread. */ - break; - } - - /* We didn't find a runnable thread. Now what? */ - if (n_in_bounded_wait == 0) { - /* No runnable threads and no prospect of any appearing - even if we wait for an arbitrary length of time. In - short, we have a deadlock. */ - VG_(pp_sched_status)(); - return VgSrc_Deadlock; - } - - /* At least one thread is in a fd-wait state. Delay for a - while, and go round again, in the hope that eventually a - thread becomes runnable. */ - nanosleep_for_a_while(); - /* pp_sched_status(); */ - /* VG_(printf)("."); */ - } - - - /* ======================= Phase 2 of 3 ======================= - Wahey! We've finally decided that thread tid is runnable, so - we now do that. Run it for as much of a quanta as possible. - Trivial requests are handled and the thread continues. The - aim is not to do too many of Phase 1 since it is expensive. */ - - if (0) - VG_(printf)("SCHED: tid %d\n", tid); - - /* Figure out how many bbs to ask vg_run_innerloop to do. Note - that it decrements the counter before testing it for zero, so - that if VG_(dispatch_ctr) is set to N you get at most N-1 - iterations. Also this means that VG_(dispatch_ctr) must - exceed zero before entering the innerloop. Also also, the - decrement is done before the bb is actually run, so you - always get at least one decrement even if nothing happens. - */ - if (VG_(bbs_to_go) >= VG_SCHEDULING_QUANTUM) - VG_(dispatch_ctr) = VG_SCHEDULING_QUANTUM + 1; - else - VG_(dispatch_ctr) = (UInt)VG_(bbs_to_go) + 1; - - /* ... and remember what we asked for. */ - dispatch_ctr_SAVED = VG_(dispatch_ctr); - - /* paranoia ... */ - vg_assert(VG_(threads)[tid].tid == tid); - - /* Actually run thread tid. */ - while (True) { - - VG_(last_run_tid) = tid; - - /* For stats purposes only. */ - VG_(num_scheduling_events_MINOR) ++; - - if (0) - VG_(message)(Vg_DebugMsg, "thread %d: running for %d bbs", - tid, VG_(dispatch_ctr) - 1 ); -# if 0 - if (VG_(bbs_done) > 31700000 + 0) { - dispatch_ctr_SAVED = VG_(dispatch_ctr) = 2; - VG_(translate)(&VG_(threads)[tid], VG_(threads)[tid].m_eip, - NULL,NULL,NULL); - } - vg_assert(VG_(threads)[tid].m_eip != 0); -# endif - - trc = run_thread_for_a_while ( tid ); - -# if 0 - if (0 == VG_(threads)[tid].m_eip) { - VG_(printf)("tid = %d, dc = %llu\n", tid, VG_(bbs_done)); - vg_assert(0 != VG_(threads)[tid].m_eip); - } -# endif - - /* Deal quickly with trivial scheduling events, and resume the - thread. */ - - if (trc == VG_TRC_INNER_FASTMISS) { - vg_assert(VG_(dispatch_ctr) > 0); - - /* Trivial event. Miss in the fast-cache. Do a full - lookup for it. */ - trans_addr - = VG_(search_transtab) ( VG_(threads)[tid].m_eip ); - if (trans_addr == (Addr)0) { - /* Not found; we need to request a translation. */ - create_translation_for( tid, VG_(threads)[tid].m_eip ); - trans_addr = VG_(search_transtab) ( VG_(threads)[tid].m_eip ); - if (trans_addr == (Addr)0) - VG_(panic)("VG_TRC_INNER_FASTMISS: missing tt_fast entry"); - } - continue; /* with this thread */ - } - - if (trc == VG_TRC_EBP_JMP_CLIENTREQ) { - UInt reqno = *(UInt*)(VG_(threads)[tid].m_eax); - /* VG_(printf)("request 0x%x\n", reqno); */ - - /* Are we really absolutely totally quitting? */ - if (reqno == VG_USERREQ__LIBC_FREERES_DONE) { - if (0 || VG_(clo_trace_syscalls) || VG_(clo_trace_sched)) { - VG_(message)(Vg_DebugMsg, - "__libc_freeres() done; really quitting!"); - } - return VgSrc_ExitSyscall; - } - - do_client_request(tid); - /* Following the request, we try and continue with the - same thread if still runnable. If not, go back to - Stage 1 to select a new thread to run. */ - if (VG_(threads)[tid].status == VgTs_Runnable - && reqno != VG_USERREQ__PTHREAD_YIELD) - continue; /* with this thread */ - else - goto stage1; - } - - if (trc == VG_TRC_EBP_JMP_SYSCALL) { - /* Do a syscall for the vthread tid. This could cause it - to become non-runnable. One special case: spot the - client doing calls to exit() and take this as the cue - to exit. */ -# if 0 - { UInt* esp; Int i; - esp=(UInt*)VG_(threads)[tid].m_esp; - VG_(printf)("\nBEFORE\n"); - for (i = 10; i >= -10; i--) - VG_(printf)("%2d %p = 0x%x\n", i, &esp[i], esp[i]); - } -# endif - - /* Deal with calling __libc_freeres() at exit. When the - client does __NR_exit, it's exiting for good. So we - then run VG_(__libc_freeres_wrapper). That quits by - doing VG_USERREQ__LIBC_FREERES_DONE, and at that point - we really exit. To be safe we nuke all other threads - currently running. - - If not valgrinding (cachegrinding, etc) don't do this. - __libc_freeres does some invalid frees which crash - the unprotected malloc/free system. */ - if (VG_(threads)[tid].m_eax == __NR_exit - && !VG_(clo_instrument)) { - if (VG_(clo_trace_syscalls) || VG_(clo_trace_sched)) { - VG_(message)(Vg_DebugMsg, - "Caught __NR_exit; quitting"); - } - return VgSrc_ExitSyscall; - } - - if (VG_(threads)[tid].m_eax == __NR_exit) { - vg_assert(VG_(clo_instrument)); - if (0 || VG_(clo_trace_syscalls) || VG_(clo_trace_sched)) { - VG_(message)(Vg_DebugMsg, - "Caught __NR_exit; running __libc_freeres()"); - } - VG_(nuke_all_threads_except) ( tid ); - VG_(threads)[tid].m_eip = (UInt)(&VG_(__libc_freeres_wrapper)); - vg_assert(VG_(threads)[tid].status == VgTs_Runnable); - goto stage1; /* party on, dudes (but not for much longer :) */ - } - - /* Trap syscalls to __NR_sched_yield and just have this - thread yield instead. Not essential, just an - optimisation. */ - if (VG_(threads)[tid].m_eax == __NR_sched_yield) { - SET_EAX(tid, 0); /* syscall returns with success */ - goto stage1; /* find a new thread to run */ - } - - sched_do_syscall(tid); - -# if 0 - { UInt* esp; Int i; - esp=(UInt*)VG_(threads)[tid].m_esp; - VG_(printf)("AFTER\n"); - for (i = 10; i >= -10; i--) - VG_(printf)("%2d %p = 0x%x\n", i, &esp[i], esp[i]); - } -# endif - - if (VG_(threads)[tid].status == VgTs_Runnable) { - /* Better do a signal check, since if in a tight loop - with a slow syscall it may be a very long time - before we get back to the main signal check in Stage 1. */ - sigs_delivered = VG_(deliver_signals)(); - if (sigs_delivered) - VG_(do_sanity_checks)( False ); - continue; /* with this thread */ - } else { - goto stage1; - } - } - - /* It's an event we can't quickly deal with. Give up running - this thread and handle things the expensive way. */ - break; - } - - /* ======================= Phase 3 of 3 ======================= - Handle non-trivial thread requests, mostly pthread stuff. */ - - /* Ok, we've fallen out of the dispatcher for a - non-completely-trivial reason. First, update basic-block - counters. */ - - done_this_time = (Int)dispatch_ctr_SAVED - (Int)VG_(dispatch_ctr) - 1; - vg_assert(done_this_time >= 0); - VG_(bbs_to_go) -= (ULong)done_this_time; - VG_(bbs_done) += (ULong)done_this_time; - - if (0 && trc != VG_TRC_INNER_FASTMISS) - VG_(message)(Vg_DebugMsg, "thread %d: completed %d bbs, trc %d", - tid, done_this_time, (Int)trc ); - - if (0 && trc != VG_TRC_INNER_FASTMISS) - VG_(message)(Vg_DebugMsg, "thread %d: %ld bbs, event %s", - tid, VG_(bbs_done), - name_of_sched_event(trc) ); - - /* Examine the thread's return code to figure out why it - stopped. */ - - switch (trc) { - - case VG_TRC_INNER_COUNTERZERO: - /* Timeslice is out. Let a new thread be scheduled, - simply by doing nothing, causing us to arrive back at - Phase 1. */ - if (VG_(bbs_to_go) == 0) { - goto debug_stop; - } - vg_assert(VG_(dispatch_ctr) == 0); - break; - - case VG_TRC_UNRESUMABLE_SIGNAL: - /* It got a SIGSEGV/SIGBUS, which we need to deliver right - away. Again, do nothing, so we wind up back at Phase - 1, whereupon the signal will be "delivered". */ - break; - - default: - VG_(printf)("\ntrc = %d\n", trc); - VG_(panic)("VG_(scheduler), phase 3: " - "unexpected thread return code"); - /* NOTREACHED */ - break; - - } /* switch (trc) */ - - /* That completes Phase 3 of 3. Return now to the top of the - main scheduler loop, to Phase 1 of 3. */ - - } /* top-level scheduler loop */ - - - /* NOTREACHED */ - VG_(panic)("scheduler: post-main-loop ?!"); - /* NOTREACHED */ - - debug_stop: - /* If we exited because of a debug stop, print the translation - of the last block executed -- by translating it again, and - throwing away the result. */ - VG_(printf)( - "======vvvvvvvv====== LAST TRANSLATION ======vvvvvvvv======\n"); - VG_(translate)( &VG_(threads)[tid], - VG_(threads)[tid].m_eip, NULL, NULL, NULL ); - VG_(printf)("\n"); - VG_(printf)( - "======^^^^^^^^====== LAST TRANSLATION ======^^^^^^^^======\n"); - - return VgSrc_BbsDone; -} - - -/* --------------------------------------------------------------------- - The pthread implementation. - ------------------------------------------------------------------ */ - -#include -#include - -#define VG_PTHREAD_STACK_MIN \ - (VG_PTHREAD_STACK_SIZE - VG_AR_CLIENT_STACKBASE_REDZONE_SZB) - -/* /usr/include/bits/pthreadtypes.h: - typedef unsigned long int pthread_t; -*/ - - -/* ----------------------------------------------------------- - Thread CREATION, JOINAGE and CANCELLATION: HELPER FNS - -------------------------------------------------------- */ - -/* We've decided to action a cancellation on tid. Make it jump to - thread_exit_wrapper() in vg_libpthread.c, passing PTHREAD_CANCELED - as the arg. */ -static -void make_thread_jump_to_cancelhdlr ( ThreadId tid ) -{ - Char msg_buf[100]; - vg_assert(VG_(is_valid_tid)(tid)); - /* Push PTHREAD_CANCELED on the stack and jump to the cancellation - handler -- which is really thread_exit_wrapper() in - vg_libpthread.c. */ - vg_assert(VG_(threads)[tid].cancel_pend != NULL); - VG_(threads)[tid].m_esp -= 4; - * (UInt*)(VG_(threads)[tid].m_esp) = (UInt)PTHREAD_CANCELED; - VG_(threads)[tid].m_eip = (UInt)VG_(threads)[tid].cancel_pend; - VG_(threads)[tid].status = VgTs_Runnable; - /* Make sure we aren't cancelled again whilst handling this - cancellation. */ - VG_(threads)[tid].cancel_st = False; - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, - "jump to cancellation handler (hdlr = %p)", - VG_(threads)[tid].cancel_pend); - print_sched_event(tid, msg_buf); - } -} - - - -/* Release resources and generally clean up once a thread has finally - disappeared. */ -static -void cleanup_after_thread_exited ( ThreadId tid ) -{ - Int i; - vki_ksigset_t irrelevant_sigmask; - vg_assert(VG_(is_valid_or_empty_tid)(tid)); - vg_assert(VG_(threads)[tid].status == VgTs_Empty); - /* Mark its stack no-access */ - if (VG_(clo_instrument) && tid != 1) - VGM_(make_noaccess)( VG_(threads)[tid].stack_base, - VG_(threads)[tid].stack_size ); - /* Forget about any pending signals directed specifically at this - thread, and get rid of signal handlers specifically arranged for - this thread. */ - VG_(block_all_host_signals)( &irrelevant_sigmask ); - VG_(handle_SCSS_change)( False /* lazy update */ ); - - /* Clean up the waiting_fd table */ - for (i = 0; i < VG_N_WAITING_FDS; i++) { - if (vg_waiting_fds[i].tid == tid) { - vg_waiting_fds[i].fd = -1; /* not in use */ - } - } -} - - -/* Look for matching pairs of threads waiting for joiners and threads - waiting for joinees. For each such pair copy the return value of - the joinee into the joiner, let the joiner resume and discard the - joinee. */ -static -void maybe_rendezvous_joiners_and_joinees ( void ) -{ - Char msg_buf[100]; - void** thread_return; - ThreadId jnr, jee; - - for (jnr = 1; jnr < VG_N_THREADS; jnr++) { - if (VG_(threads)[jnr].status != VgTs_WaitJoinee) - continue; - jee = VG_(threads)[jnr].joiner_jee_tid; - if (jee == VG_INVALID_THREADID) - continue; - vg_assert(VG_(is_valid_tid)(jee)); - if (VG_(threads)[jee].status != VgTs_WaitJoiner) - continue; - /* ok! jnr is waiting to join with jee, and jee is waiting to be - joined by ... well, any thread. So let's do it! */ - - /* Copy return value to where joiner wants it. */ - thread_return = VG_(threads)[jnr].joiner_thread_return; - if (thread_return != NULL) { - /* CHECK thread_return writable */ - if (VG_(clo_instrument) - && !VGM_(check_writable)( (Addr)thread_return, - sizeof(void*), NULL)) - VG_(record_pthread_err)( jnr, - "pthread_join: thread_return points to invalid location"); - - *thread_return = VG_(threads)[jee].joinee_retval; - /* Not really right, since it makes the thread's return value - appear to be defined even if it isn't. */ - if (VG_(clo_instrument)) - VGM_(make_readable)( (Addr)thread_return, sizeof(void*) ); - } - - /* Joinee is discarded */ - VG_(threads)[jee].status = VgTs_Empty; /* bye! */ - cleanup_after_thread_exited ( jee ); - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, - "rendezvous with joinee %d. %d resumes, %d exits.", - jee, jnr, jee ); - print_sched_event(jnr, msg_buf); - } - - /* joiner returns with success */ - VG_(threads)[jnr].status = VgTs_Runnable; - SET_EDX(jnr, 0); - } -} - - -/* Nuke all threads other than tid. POSIX specifies that this should - happen in __NR_exec, and after a __NR_fork() when I am the child, - as POSIX requires. */ -void VG_(nuke_all_threads_except) ( ThreadId me ) -{ - ThreadId tid; - for (tid = 1; tid < VG_N_THREADS; tid++) { - if (tid == me - || VG_(threads)[tid].status == VgTs_Empty) - continue; - if (0) - VG_(printf)( - "VG_(nuke_all_threads_except): nuking tid %d\n", tid); - VG_(threads)[tid].status = VgTs_Empty; - cleanup_after_thread_exited( tid ); - } -} - - -/* ----------------------------------------------------------- - Thread CREATION, JOINAGE and CANCELLATION: REQUESTS - -------------------------------------------------------- */ - -static -void do__cleanup_push ( ThreadId tid, CleanupEntry* cu ) -{ - Int sp; - Char msg_buf[100]; - vg_assert(VG_(is_valid_tid)(tid)); - sp = VG_(threads)[tid].custack_used; - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, - "cleanup_push (fn %p, arg %p) -> slot %d", - cu->fn, cu->arg, sp); - print_sched_event(tid, msg_buf); - } - vg_assert(sp >= 0 && sp <= VG_N_CLEANUPSTACK); - if (sp == VG_N_CLEANUPSTACK) - VG_(panic)("do__cleanup_push: VG_N_CLEANUPSTACK is too small." - " Increase and recompile."); - VG_(threads)[tid].custack[sp] = *cu; - sp++; - VG_(threads)[tid].custack_used = sp; - SET_EDX(tid, 0); -} - - -static -void do__cleanup_pop ( ThreadId tid, CleanupEntry* cu ) -{ - Int sp; - Char msg_buf[100]; - vg_assert(VG_(is_valid_tid)(tid)); - sp = VG_(threads)[tid].custack_used; - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, - "cleanup_pop from slot %d", sp-1); - print_sched_event(tid, msg_buf); - } - vg_assert(sp >= 0 && sp <= VG_N_CLEANUPSTACK); - if (sp == 0) { - SET_EDX(tid, -1); - return; - } - sp--; - *cu = VG_(threads)[tid].custack[sp]; - if (VG_(clo_instrument)) - VGM_(make_readable)( (Addr)cu, sizeof(CleanupEntry) ); - VG_(threads)[tid].custack_used = sp; - SET_EDX(tid, 0); -} - - -static -void do_pthread_yield ( ThreadId tid ) -{ - Char msg_buf[100]; - vg_assert(VG_(is_valid_tid)(tid)); - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, "yield"); - print_sched_event(tid, msg_buf); - } - SET_EDX(tid, 0); -} - - -static -void do__testcancel ( ThreadId tid ) -{ - Char msg_buf[100]; - vg_assert(VG_(is_valid_tid)(tid)); - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, "testcancel"); - print_sched_event(tid, msg_buf); - } - if (/* is there a cancellation pending on this thread? */ - VG_(threads)[tid].cancel_pend != NULL - && /* is this thread accepting cancellations? */ - VG_(threads)[tid].cancel_st) { - /* Ok, let's do the cancellation. */ - make_thread_jump_to_cancelhdlr ( tid ); - } else { - /* No, we keep going. */ - SET_EDX(tid, 0); - } -} - - -static -void do__set_cancelstate ( ThreadId tid, Int state ) -{ - Bool old_st; - Char msg_buf[100]; - vg_assert(VG_(is_valid_tid)(tid)); - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, "set_cancelstate to %d (%s)", state, - state==PTHREAD_CANCEL_ENABLE - ? "ENABLE" - : (state==PTHREAD_CANCEL_DISABLE ? "DISABLE" : "???")); - print_sched_event(tid, msg_buf); - } - old_st = VG_(threads)[tid].cancel_st; - if (state == PTHREAD_CANCEL_ENABLE) { - VG_(threads)[tid].cancel_st = True; - } else - if (state == PTHREAD_CANCEL_DISABLE) { - VG_(threads)[tid].cancel_st = False; - } else { - VG_(panic)("do__set_cancelstate"); - } - SET_EDX(tid, old_st ? PTHREAD_CANCEL_ENABLE - : PTHREAD_CANCEL_DISABLE); -} - - -static -void do__set_canceltype ( ThreadId tid, Int type ) -{ - Bool old_ty; - Char msg_buf[100]; - vg_assert(VG_(is_valid_tid)(tid)); - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, "set_canceltype to %d (%s)", type, - type==PTHREAD_CANCEL_ASYNCHRONOUS - ? "ASYNCHRONOUS" - : (type==PTHREAD_CANCEL_DEFERRED ? "DEFERRED" : "???")); - print_sched_event(tid, msg_buf); - } - old_ty = VG_(threads)[tid].cancel_ty; - if (type == PTHREAD_CANCEL_ASYNCHRONOUS) { - VG_(threads)[tid].cancel_ty = False; - } else - if (type == PTHREAD_CANCEL_DEFERRED) { - VG_(threads)[tid].cancel_ty = True; - } else { - VG_(panic)("do__set_canceltype"); - } - SET_EDX(tid, old_ty ? PTHREAD_CANCEL_DEFERRED - : PTHREAD_CANCEL_ASYNCHRONOUS); -} - - -/* Set or get the detach state for thread det. */ -static -void do__set_or_get_detach ( ThreadId tid, - Int what, ThreadId det ) -{ - ThreadId i; - Char msg_buf[100]; - /* VG_(printf)("do__set_or_get_detach tid %d what %d det %d\n", - tid, what, det); */ - vg_assert(VG_(is_valid_tid)(tid)); - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, "set_or_get_detach %d (%s) for tid %d", what, - what==0 ? "not-detached" : ( - what==1 ? "detached" : ( - what==2 ? "fetch old value" : "???")), - det ); - print_sched_event(tid, msg_buf); - } - - if (!VG_(is_valid_tid)(det)) { - SET_EDX(tid, -1); - return; - } - - switch (what) { - case 2: /* get */ - SET_EDX(tid, VG_(threads)[det].detached ? 1 : 0); - return; - case 1: /* set detached. If someone is in a join-wait for det, - do not detach. */ - for (i = 1; i < VG_N_THREADS; i++) { - if (VG_(threads)[i].status == VgTs_WaitJoinee - && VG_(threads)[i].joiner_jee_tid == det) { - SET_EDX(tid, 0); - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, - "tid %d not detached because %d in join-wait for it %d", - det, i); - print_sched_event(tid, msg_buf); - } - return; - } - } - VG_(threads)[det].detached = True; - SET_EDX(tid, 0); - return; - case 0: /* set not detached */ - VG_(threads)[det].detached = False; - SET_EDX(tid, 0); - return; - default: - VG_(panic)("do__set_or_get_detach"); - } -} - - -static -void do__set_cancelpend ( ThreadId tid, - ThreadId cee, - void (*cancelpend_hdlr)(void*) ) -{ - Char msg_buf[100]; - - vg_assert(VG_(is_valid_tid)(tid)); - vg_assert(VG_(threads)[tid].status == VgTs_Runnable); - - if (!VG_(is_valid_tid)(cee)) { - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, - "set_cancelpend for invalid tid %d", cee); - print_sched_event(tid, msg_buf); - } - VG_(record_pthread_err)( tid, - "pthread_cancel: target thread does not exist, or invalid"); - SET_EDX(tid, -VKI_ESRCH); - return; - } - - VG_(threads)[cee].cancel_pend = cancelpend_hdlr; - - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, - "set_cancelpend (hdlr = %p, set by tid %d)", - cancelpend_hdlr, tid); - print_sched_event(cee, msg_buf); - } - - /* Thread doing the cancelling returns with success. */ - SET_EDX(tid, 0); - - /* Perhaps we can nuke the cancellee right now? */ - do__testcancel(cee); -} - - -static -void do_pthread_join ( ThreadId tid, - ThreadId jee, void** thread_return ) -{ - Char msg_buf[100]; - ThreadId i; - /* jee, the joinee, is the thread specified as an arg in thread - tid's call to pthread_join. So tid is the join-er. */ - vg_assert(VG_(is_valid_tid)(tid)); - vg_assert(VG_(threads)[tid].status == VgTs_Runnable); - - if (jee == tid) { - VG_(record_pthread_err)( tid, - "pthread_join: attempt to join to self"); - SET_EDX(tid, EDEADLK); /* libc constant, not a kernel one */ - VG_(threads)[tid].status = VgTs_Runnable; - return; - } - - /* Flush any completed pairs, so as to make sure what we're looking - at is up-to-date. */ - maybe_rendezvous_joiners_and_joinees(); - - /* Is this a sane request? */ - if (jee < 0 - || jee >= VG_N_THREADS - || VG_(threads)[jee].status == VgTs_Empty) { - /* Invalid thread to join to. */ - VG_(record_pthread_err)( tid, - "pthread_join: target thread does not exist, or invalid"); - SET_EDX(tid, EINVAL); - VG_(threads)[tid].status = VgTs_Runnable; - return; - } - - /* Is anyone else already in a join-wait for jee? */ - for (i = 1; i < VG_N_THREADS; i++) { - if (i == tid) continue; - if (VG_(threads)[i].status == VgTs_WaitJoinee - && VG_(threads)[i].joiner_jee_tid == jee) { - /* Someone already did join on this thread */ - VG_(record_pthread_err)( tid, - "pthread_join: another thread already " - "in join-wait for target thread"); - SET_EDX(tid, EINVAL); - VG_(threads)[tid].status = VgTs_Runnable; - return; - } - } - - /* Mark this thread as waiting for the joinee. */ - VG_(threads)[tid].status = VgTs_WaitJoinee; - VG_(threads)[tid].joiner_thread_return = thread_return; - VG_(threads)[tid].joiner_jee_tid = jee; - - /* Look for matching joiners and joinees and do the right thing. */ - maybe_rendezvous_joiners_and_joinees(); - - /* Return value is irrelevant since this this thread becomes - non-runnable. maybe_resume_joiner() will cause it to return the - right value when it resumes. */ - - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, - "wait for joinee %d (may already be ready)", jee); - print_sched_event(tid, msg_buf); - } -} - - -/* ( void* ): calling thread waits for joiner and returns the void* to - it. This is one of two ways in which a thread can finally exit -- - the other is do__quit. */ -static -void do__wait_joiner ( ThreadId tid, void* retval ) -{ - Char msg_buf[100]; - vg_assert(VG_(is_valid_tid)(tid)); - vg_assert(VG_(threads)[tid].status == VgTs_Runnable); - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, - "do__wait_joiner(retval = %p) (non-detached thread exit)", retval); - print_sched_event(tid, msg_buf); - } - VG_(threads)[tid].status = VgTs_WaitJoiner; - VG_(threads)[tid].joinee_retval = retval; - maybe_rendezvous_joiners_and_joinees(); -} - - -/* ( no-args ): calling thread disappears from the system forever. - Reclaim resources. */ -static -void do__quit ( ThreadId tid ) -{ - Char msg_buf[100]; - vg_assert(VG_(is_valid_tid)(tid)); - vg_assert(VG_(threads)[tid].status == VgTs_Runnable); - VG_(threads)[tid].status = VgTs_Empty; /* bye! */ - cleanup_after_thread_exited ( tid ); - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, "do__quit (detached thread exit)"); - print_sched_event(tid, msg_buf); - } - /* Return value is irrelevant; this thread will not get - rescheduled. */ -} - - -/* Should never be entered. If it is, will be on the simulated - CPU. */ -static -void do__apply_in_new_thread_bogusRA ( void ) -{ - VG_(panic)("do__apply_in_new_thread_bogusRA"); -} - -/* (Fn, Arg): Create a new thread and run Fn applied to Arg in it. Fn - MUST NOT return -- ever. Eventually it will do either __QUIT or - __WAIT_JOINER. Return the child tid to the parent. */ -static -void do__apply_in_new_thread ( ThreadId parent_tid, - void* (*fn)(void *), - void* arg ) -{ - Addr new_stack; - UInt new_stk_szb; - ThreadId tid; - Char msg_buf[100]; - - /* Paranoia ... */ - vg_assert(sizeof(pthread_t) == sizeof(UInt)); - - vg_assert(VG_(threads)[parent_tid].status != VgTs_Empty); - - tid = vg_alloc_ThreadState(); - - /* If we've created the main thread's tid, we're in deep trouble :) */ - vg_assert(tid != 1); - vg_assert(VG_(is_valid_or_empty_tid)(tid)); - - /* Copy the parent's CPU state into the child's, in a roundabout - way (via baseBlock). */ - VG_(load_thread_state)(parent_tid); - VG_(save_thread_state)(tid); - - /* Consider allocating the child a stack, if the one it already has - is inadequate. */ - new_stk_szb = VG_PTHREAD_STACK_MIN; - - if (new_stk_szb > VG_(threads)[tid].stack_size) { - /* Again, for good measure :) We definitely don't want to be - allocating a stack for the main thread. */ - vg_assert(tid != 1); - /* for now, we don't handle the case of anything other than - assigning it for the first time. */ - vg_assert(VG_(threads)[tid].stack_size == 0); - vg_assert(VG_(threads)[tid].stack_base == (Addr)NULL); - new_stack = (Addr)VG_(get_memory_from_mmap)( new_stk_szb, - "new thread stack" ); - VG_(threads)[tid].stack_base = new_stack; - VG_(threads)[tid].stack_size = new_stk_szb; - VG_(threads)[tid].stack_highest_word - = new_stack + new_stk_szb - - VG_AR_CLIENT_STACKBASE_REDZONE_SZB; /* -4 ??? */; - } - - VG_(threads)[tid].m_esp - = VG_(threads)[tid].stack_base - + VG_(threads)[tid].stack_size - - VG_AR_CLIENT_STACKBASE_REDZONE_SZB; - - if (VG_(clo_instrument)) - VGM_(make_noaccess)( VG_(threads)[tid].m_esp, - VG_AR_CLIENT_STACKBASE_REDZONE_SZB ); - - /* push arg */ - VG_(threads)[tid].m_esp -= 4; - * (UInt*)(VG_(threads)[tid].m_esp) = (UInt)arg; - - /* push (bogus) return address */ - VG_(threads)[tid].m_esp -= 4; - * (UInt*)(VG_(threads)[tid].m_esp) - = (UInt)&do__apply_in_new_thread_bogusRA; - - if (VG_(clo_instrument)) - VGM_(make_readable)( VG_(threads)[tid].m_esp, 2 * 4 ); - - /* this is where we start */ - VG_(threads)[tid].m_eip = (UInt)fn; - - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, - "new thread, created by %d", parent_tid ); - print_sched_event(tid, msg_buf); - } - - /* Create new thread with default attrs: - deferred cancellation, not detached - */ - mostly_clear_thread_record(tid); - VG_(threads)[tid].status = VgTs_Runnable; - - /* We inherit our parent's signal mask. */ - VG_(threads)[tid].sig_mask = VG_(threads)[parent_tid].sig_mask; - VG_(ksigemptyset)(&VG_(threads)[tid].sigs_waited_for); - - /* return child's tid to parent */ - SET_EDX(parent_tid, tid); /* success */ -} - - -/* ----------------------------------------------------------- - MUTEXes - -------------------------------------------------------- */ - -/* pthread_mutex_t is a struct with at 5 words: - typedef struct - { - int __m_reserved; -- Reserved for future use - int __m_count; -- Depth of recursive locking - _pthread_descr __m_owner; -- Owner thread (if recursive or errcheck) - int __m_kind; -- Mutex kind: fast, recursive or errcheck - struct _pthread_fastlock __m_lock; -- Underlying fast lock - } pthread_mutex_t; - - #define PTHREAD_MUTEX_INITIALIZER \ - {0, 0, 0, PTHREAD_MUTEX_TIMED_NP, __LOCK_INITIALIZER} - # define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP \ - {0, 0, 0, PTHREAD_MUTEX_RECURSIVE_NP, __LOCK_INITIALIZER} - # define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP \ - {0, 0, 0, PTHREAD_MUTEX_ERRORCHECK_NP, __LOCK_INITIALIZER} - # define PTHREAD_ADAPTIVE_MUTEX_INITIALIZER_NP \ - {0, 0, 0, PTHREAD_MUTEX_ADAPTIVE_NP, __LOCK_INITIALIZER} - - How we use it: - - __m_kind never changes and indicates whether or not it is recursive. - - __m_count indicates the lock count; if 0, the mutex is not owned by - anybody. - - __m_owner has a ThreadId value stuffed into it. We carefully arrange - that ThreadId == 0 is invalid (VG_INVALID_THREADID), so that - statically initialised mutexes correctly appear - to belong to nobody. - - In summary, a not-in-use mutex is distinguised by having __m_owner - == 0 (VG_INVALID_THREADID) and __m_count == 0 too. If one of those - conditions holds, the other should too. - - There is no linked list of threads waiting for this mutex. Instead - a thread in WaitMX state points at the mutex with its waited_on_mx - field. This makes _unlock() inefficient, but simple to implement the - right semantics viz-a-viz signals. - - We don't have to deal with mutex initialisation; the client side - deals with that for us. -*/ - -/* Helper fns ... */ -static -void release_one_thread_waiting_on_mutex ( pthread_mutex_t* mutex, - Char* caller ) -{ - Int i; - Char msg_buf[100]; - - /* Find some arbitrary thread waiting on this mutex, and make it - runnable. If none are waiting, mark the mutex as not held. */ - for (i = 1; i < VG_N_THREADS; i++) { - if (VG_(threads)[i].status == VgTs_Empty) - continue; - if (VG_(threads)[i].status == VgTs_WaitMX - && VG_(threads)[i].associated_mx == mutex) - break; - } - - vg_assert(i <= VG_N_THREADS); - if (i == VG_N_THREADS) { - /* Nobody else is waiting on it. */ - mutex->__m_count = 0; - mutex->__m_owner = VG_INVALID_THREADID; - } else { - /* Notionally transfer the hold to thread i, whose - pthread_mutex_lock() call now returns with 0 (success). */ - /* The .count is already == 1. */ - vg_assert(VG_(threads)[i].associated_mx == mutex); - mutex->__m_owner = (_pthread_descr)i; - VG_(threads)[i].status = VgTs_Runnable; - VG_(threads)[i].associated_mx = NULL; - /* m_edx already holds pth_mx_lock() success (0) */ - - if (VG_(clo_trace_pthread_level) >= 1) { - VG_(sprintf)(msg_buf, "%s mx %p: RESUME", - caller, mutex ); - print_pthread_event(i, msg_buf); - } - } -} - - -static -void do_pthread_mutex_lock( ThreadId tid, - Bool is_trylock, - pthread_mutex_t* mutex ) -{ - Char msg_buf[100]; - Char* caller - = is_trylock ? "pthread_mutex_trylock" - : "pthread_mutex_lock "; - - if (VG_(clo_trace_pthread_level) >= 2) { - VG_(sprintf)(msg_buf, "%s mx %p ...", caller, mutex ); - print_pthread_event(tid, msg_buf); - } - - /* Paranoia ... */ - vg_assert(VG_(is_valid_tid)(tid) - && VG_(threads)[tid].status == VgTs_Runnable); - - /* POSIX doesn't mandate this, but for sanity ... */ - if (mutex == NULL) { - VG_(record_pthread_err)( tid, - "pthread_mutex_lock/trylock: mutex is NULL"); - SET_EDX(tid, EINVAL); - return; - } - - /* More paranoia ... */ - switch (mutex->__m_kind) { -# ifndef GLIBC_2_1 - case PTHREAD_MUTEX_TIMED_NP: - case PTHREAD_MUTEX_ADAPTIVE_NP: -# endif -# ifdef GLIBC_2_1 - case PTHREAD_MUTEX_FAST_NP: -# endif - case PTHREAD_MUTEX_RECURSIVE_NP: - case PTHREAD_MUTEX_ERRORCHECK_NP: - if (mutex->__m_count >= 0) break; - /* else fall thru */ - default: - VG_(record_pthread_err)( tid, - "pthread_mutex_lock/trylock: mutex is invalid"); - SET_EDX(tid, EINVAL); - return; - } - - if (mutex->__m_count > 0) { - - vg_assert(VG_(is_valid_tid)((ThreadId)mutex->__m_owner)); - - /* Someone has it already. */ - if ((ThreadId)mutex->__m_owner == tid) { - /* It's locked -- by me! */ - if (mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP) { - /* return 0 (success). */ - mutex->__m_count++; - SET_EDX(tid, 0); - if (0) - VG_(printf)("!!!!!! tid %d, mx %p -> locked %d\n", - tid, mutex, mutex->__m_count); - return; - } else { - if (is_trylock) - SET_EDX(tid, EBUSY); - else - SET_EDX(tid, EDEADLK); - return; - } - } else { - /* Someone else has it; we have to wait. Mark ourselves - thusly. */ - /* GUARD: __m_count > 0 && __m_owner is valid */ - if (is_trylock) { - /* caller is polling; so return immediately. */ - SET_EDX(tid, EBUSY); - } else { - VG_(threads)[tid].status = VgTs_WaitMX; - VG_(threads)[tid].associated_mx = mutex; - SET_EDX(tid, 0); /* pth_mx_lock success value */ - if (VG_(clo_trace_pthread_level) >= 1) { - VG_(sprintf)(msg_buf, "%s mx %p: BLOCK", - caller, mutex ); - print_pthread_event(tid, msg_buf); - } - } - return; - } - - } else { - /* Nobody owns it. Sanity check ... */ - vg_assert(mutex->__m_owner == VG_INVALID_THREADID); - /* We get it! [for the first time]. */ - mutex->__m_count = 1; - mutex->__m_owner = (_pthread_descr)tid; - /* return 0 (success). */ - SET_EDX(tid, 0); - } - -} - - -static -void do_pthread_mutex_unlock ( ThreadId tid, - pthread_mutex_t* mutex ) -{ - Char msg_buf[100]; - - if (VG_(clo_trace_pthread_level) >= 2) { - VG_(sprintf)(msg_buf, "pthread_mutex_unlock mx %p ...", mutex ); - print_pthread_event(tid, msg_buf); - } - - /* Paranoia ... */ - vg_assert(VG_(is_valid_tid)(tid) - && VG_(threads)[tid].status == VgTs_Runnable); - - if (mutex == NULL) { - VG_(record_pthread_err)( tid, - "pthread_mutex_unlock: mutex is NULL"); - SET_EDX(tid, EINVAL); - return; - } - - /* More paranoia ... */ - switch (mutex->__m_kind) { -# ifndef GLIBC_2_1 - case PTHREAD_MUTEX_TIMED_NP: - case PTHREAD_MUTEX_ADAPTIVE_NP: -# endif -# ifdef GLIBC_2_1 - case PTHREAD_MUTEX_FAST_NP: -# endif - case PTHREAD_MUTEX_RECURSIVE_NP: - case PTHREAD_MUTEX_ERRORCHECK_NP: - if (mutex->__m_count >= 0) break; - /* else fall thru */ - default: - VG_(record_pthread_err)( tid, - "pthread_mutex_unlock: mutex is invalid"); - SET_EDX(tid, EINVAL); - return; - } - - /* Barf if we don't currently hold the mutex. */ - if (mutex->__m_count == 0) { - /* nobody holds it */ - VG_(record_pthread_err)( tid, - "pthread_mutex_unlock: mutex is not locked"); - SET_EDX(tid, EPERM); - return; - } - - if ((ThreadId)mutex->__m_owner != tid) { - /* we don't hold it */ - VG_(record_pthread_err)( tid, - "pthread_mutex_unlock: mutex is locked by a different thread"); - SET_EDX(tid, EPERM); - return; - } - - /* If it's a multiply-locked recursive mutex, just decrement the - lock count and return. */ - if (mutex->__m_count > 1) { - vg_assert(mutex->__m_kind == PTHREAD_MUTEX_RECURSIVE_NP); - mutex->__m_count --; - SET_EDX(tid, 0); /* success */ - return; - } - - /* Now we're sure it is locked exactly once, and by the thread who - is now doing an unlock on it. */ - vg_assert(mutex->__m_count == 1); - vg_assert((ThreadId)mutex->__m_owner == tid); - - /* Release at max one thread waiting on this mutex. */ - release_one_thread_waiting_on_mutex ( mutex, "pthread_mutex_lock" ); - - /* Our (tid's) pth_unlock() returns with 0 (success). */ - SET_EDX(tid, 0); /* Success. */ -} - - -/* ----------------------------------------------------------- - CONDITION VARIABLES - -------------------------------------------------------- */ - -/* The relevant native types are as follows: - (copied from /usr/include/bits/pthreadtypes.h) - - -- Conditions (not abstract because of PTHREAD_COND_INITIALIZER - typedef struct - { - struct _pthread_fastlock __c_lock; -- Protect against concurrent access - _pthread_descr __c_waiting; -- Threads waiting on this condition - } pthread_cond_t; - - -- Attribute for conditionally variables. - typedef struct - { - int __dummy; - } pthread_condattr_t; - - #define PTHREAD_COND_INITIALIZER {__LOCK_INITIALIZER, 0} - - We don't use any fields of pthread_cond_t for anything at all. - Only the identity of the CVs is important. - - Linux pthreads supports no attributes on condition variables, so we - don't need to think too hard there. */ - - -static -void do_pthread_cond_timedwait_TIMEOUT ( ThreadId tid ) -{ - Char msg_buf[100]; - pthread_mutex_t* mx; - pthread_cond_t* cv; - - vg_assert(VG_(is_valid_tid)(tid) - && VG_(threads)[tid].status == VgTs_WaitCV - && VG_(threads)[tid].awaken_at != 0xFFFFFFFF); - mx = VG_(threads)[tid].associated_mx; - vg_assert(mx != NULL); - cv = VG_(threads)[tid].associated_cv; - vg_assert(cv != NULL); - - if (mx->__m_owner == VG_INVALID_THREADID) { - /* Currently unheld; hand it out to thread tid. */ - vg_assert(mx->__m_count == 0); - VG_(threads)[tid].status = VgTs_Runnable; - SET_EDX(tid, ETIMEDOUT); /* pthread_cond_wait return value */ - VG_(threads)[tid].associated_cv = NULL; - VG_(threads)[tid].associated_mx = NULL; - mx->__m_owner = (_pthread_descr)tid; - mx->__m_count = 1; - - if (VG_(clo_trace_pthread_level) >= 1) { - VG_(sprintf)(msg_buf, - "pthread_cond_timedwai cv %p: TIMEOUT with mx %p", - cv, mx ); - print_pthread_event(tid, msg_buf); - } - } else { - /* Currently held. Make thread tid be blocked on it. */ - vg_assert(mx->__m_count > 0); - VG_(threads)[tid].status = VgTs_WaitMX; - SET_EDX(tid, ETIMEDOUT); /* pthread_cond_wait return value */ - VG_(threads)[tid].associated_cv = NULL; - VG_(threads)[tid].associated_mx = mx; - if (VG_(clo_trace_pthread_level) >= 1) { - VG_(sprintf)(msg_buf, - "pthread_cond_timedwai cv %p: TIMEOUT -> BLOCK for mx %p", - cv, mx ); - print_pthread_event(tid, msg_buf); - } - - } -} - - -static -void release_N_threads_waiting_on_cond ( pthread_cond_t* cond, - Int n_to_release, - Char* caller ) -{ - Int i; - Char msg_buf[100]; - pthread_mutex_t* mx; - - while (True) { - if (n_to_release == 0) - return; - - /* Find a thread waiting on this CV. */ - for (i = 1; i < VG_N_THREADS; i++) { - if (VG_(threads)[i].status == VgTs_Empty) - continue; - if (VG_(threads)[i].status == VgTs_WaitCV - && VG_(threads)[i].associated_cv == cond) - break; - } - vg_assert(i <= VG_N_THREADS); - - if (i == VG_N_THREADS) { - /* Nobody else is waiting on it. */ - return; - } - - mx = VG_(threads)[i].associated_mx; - vg_assert(mx != NULL); - - if (mx->__m_owner == VG_INVALID_THREADID) { - /* Currently unheld; hand it out to thread i. */ - vg_assert(mx->__m_count == 0); - VG_(threads)[i].status = VgTs_Runnable; - VG_(threads)[i].associated_cv = NULL; - VG_(threads)[i].associated_mx = NULL; - mx->__m_owner = (_pthread_descr)i; - mx->__m_count = 1; - /* .m_edx already holds pth_cond_wait success value (0) */ - - if (VG_(clo_trace_pthread_level) >= 1) { - VG_(sprintf)(msg_buf, "%s cv %p: RESUME with mx %p", - caller, cond, mx ); - print_pthread_event(i, msg_buf); - } - - } else { - /* Currently held. Make thread i be blocked on it. */ - vg_assert(mx->__m_count > 0); - VG_(threads)[i].status = VgTs_WaitMX; - VG_(threads)[i].associated_cv = NULL; - VG_(threads)[i].associated_mx = mx; - SET_EDX(i, 0); /* pth_cond_wait success value */ - - if (VG_(clo_trace_pthread_level) >= 1) { - VG_(sprintf)(msg_buf, "%s cv %p: BLOCK for mx %p", - caller, cond, mx ); - print_pthread_event(i, msg_buf); - } - - } - - n_to_release--; - } -} - - -static -void do_pthread_cond_wait ( ThreadId tid, - pthread_cond_t *cond, - pthread_mutex_t *mutex, - UInt ms_end ) -{ - Char msg_buf[100]; - - /* If ms_end == 0xFFFFFFFF, wait forever (no timeout). Otherwise, - ms_end is the ending millisecond. */ - - /* pre: mutex should be a valid mutex and owned by tid. */ - if (VG_(clo_trace_pthread_level) >= 2) { - VG_(sprintf)(msg_buf, "pthread_cond_wait cv %p, mx %p, end %d ...", - cond, mutex, ms_end ); - print_pthread_event(tid, msg_buf); - } - - /* Paranoia ... */ - vg_assert(VG_(is_valid_tid)(tid) - && VG_(threads)[tid].status == VgTs_Runnable); - - if (mutex == NULL || cond == NULL) { - VG_(record_pthread_err)( tid, - "pthread_cond_wait/timedwait: cond or mutex is NULL"); - SET_EDX(tid, EINVAL); - return; - } - - /* More paranoia ... */ - switch (mutex->__m_kind) { -# ifndef GLIBC_2_1 - case PTHREAD_MUTEX_TIMED_NP: - case PTHREAD_MUTEX_ADAPTIVE_NP: -# endif -# ifdef GLIBC_2_1 - case PTHREAD_MUTEX_FAST_NP: -# endif - case PTHREAD_MUTEX_RECURSIVE_NP: - case PTHREAD_MUTEX_ERRORCHECK_NP: - if (mutex->__m_count >= 0) break; - /* else fall thru */ - default: - VG_(record_pthread_err)( tid, - "pthread_cond_wait/timedwait: mutex is invalid"); - SET_EDX(tid, EINVAL); - return; - } - - /* Barf if we don't currently hold the mutex. */ - if (mutex->__m_count == 0 /* nobody holds it */ - || (ThreadId)mutex->__m_owner != tid /* we don't hold it */) { - VG_(record_pthread_err)( tid, - "pthread_cond_wait/timedwait: mutex is unlocked " - "or is locked but not owned by thread"); - SET_EDX(tid, EINVAL); - return; - } - - /* Queue ourselves on the condition. */ - VG_(threads)[tid].status = VgTs_WaitCV; - VG_(threads)[tid].associated_cv = cond; - VG_(threads)[tid].associated_mx = mutex; - VG_(threads)[tid].awaken_at = ms_end; - - if (VG_(clo_trace_pthread_level) >= 1) { - VG_(sprintf)(msg_buf, - "pthread_cond_wait cv %p, mx %p: BLOCK", - cond, mutex ); - print_pthread_event(tid, msg_buf); - } - - /* Release the mutex. */ - release_one_thread_waiting_on_mutex ( mutex, "pthread_cond_wait " ); -} - - -static -void do_pthread_cond_signal_or_broadcast ( ThreadId tid, - Bool broadcast, - pthread_cond_t *cond ) -{ - Char msg_buf[100]; - Char* caller - = broadcast ? "pthread_cond_broadcast" - : "pthread_cond_signal "; - - if (VG_(clo_trace_pthread_level) >= 2) { - VG_(sprintf)(msg_buf, "%s cv %p ...", - caller, cond ); - print_pthread_event(tid, msg_buf); - } - - /* Paranoia ... */ - vg_assert(VG_(is_valid_tid)(tid) - && VG_(threads)[tid].status == VgTs_Runnable); - - if (cond == NULL) { - VG_(record_pthread_err)( tid, - "pthread_cond_signal/broadcast: cond is NULL"); - SET_EDX(tid, EINVAL); - return; - } - - release_N_threads_waiting_on_cond ( - cond, - broadcast ? VG_N_THREADS : 1, - caller - ); - - SET_EDX(tid, 0); /* success */ -} - - -/* ----------------------------------------------------------- - THREAD SPECIFIC DATA - -------------------------------------------------------- */ - -static __inline__ -Bool is_valid_key ( ThreadKey k ) -{ - /* k unsigned; hence no < 0 check */ - if (k >= VG_N_THREAD_KEYS) return False; - if (!vg_thread_keys[k].inuse) return False; - return True; -} - -static -void do_pthread_key_create ( ThreadId tid, - pthread_key_t* key, - void (*destructor)(void*) ) -{ - Int i; - Char msg_buf[100]; - - if (VG_(clo_trace_pthread_level) >= 1) { - VG_(sprintf)(msg_buf, "pthread_key_create *key %p, destr %p", - key, destructor ); - print_pthread_event(tid, msg_buf); - } - - vg_assert(sizeof(pthread_key_t) == sizeof(ThreadKey)); - vg_assert(VG_(is_valid_tid)(tid) - && VG_(threads)[tid].status == VgTs_Runnable); - - for (i = 0; i < VG_N_THREAD_KEYS; i++) - if (!vg_thread_keys[i].inuse) - break; - - if (i == VG_N_THREAD_KEYS) { - /* SET_EDX(tid, EAGAIN); - return; - */ - VG_(panic)("pthread_key_create: VG_N_THREAD_KEYS is too low;" - " increase and recompile"); - } - - vg_thread_keys[i].inuse = True; - vg_thread_keys[i].destructor = destructor; - - /* check key for addressibility */ - if (VG_(clo_instrument) - && !VGM_(check_writable)( (Addr)key, - sizeof(pthread_key_t), NULL)) - VG_(record_pthread_err)( tid, - "pthread_key_create: key points to invalid location"); - *key = i; - if (VG_(clo_instrument)) - VGM_(make_readable)( (Addr)key, sizeof(pthread_key_t) ); - - SET_EDX(tid, 0); -} - - -static -void do_pthread_key_delete ( ThreadId tid, pthread_key_t key ) -{ - Char msg_buf[100]; - if (VG_(clo_trace_pthread_level) >= 1) { - VG_(sprintf)(msg_buf, "pthread_key_delete key %d", - key ); - print_pthread_event(tid, msg_buf); - } - - vg_assert(VG_(is_valid_tid)(tid) - && VG_(threads)[tid].status == VgTs_Runnable); - - if (!is_valid_key(key)) { - VG_(record_pthread_err)( tid, - "pthread_key_delete: key is invalid"); - SET_EDX(tid, EINVAL); - return; - } - - vg_thread_keys[key].inuse = False; - - /* Optional. We're not required to do this, although it shouldn't - make any difference to programs which use the key/specifics - functions correctly. */ -# if 1 - for (tid = 1; tid < VG_N_THREADS; tid++) { - if (VG_(threads)[tid].status != VgTs_Empty) - VG_(threads)[tid].specifics[key] = NULL; - } -# endif -} - - -static -void do_pthread_getspecific ( ThreadId tid, pthread_key_t key ) -{ - Char msg_buf[100]; - if (VG_(clo_trace_pthread_level) >= 1) { - VG_(sprintf)(msg_buf, "pthread_getspecific key %d", - key ); - print_pthread_event(tid, msg_buf); - } - - vg_assert(VG_(is_valid_tid)(tid) - && VG_(threads)[tid].status == VgTs_Runnable); - - if (!is_valid_key(key)) { - VG_(record_pthread_err)( tid, - "pthread_getspecific: key is invalid"); - SET_EDX(tid, (UInt)NULL); - return; - } - - SET_EDX(tid, (UInt)VG_(threads)[tid].specifics[key]); -} - - -static -void do_pthread_setspecific ( ThreadId tid, - pthread_key_t key, - void *pointer ) -{ - Char msg_buf[100]; - if (VG_(clo_trace_pthread_level) >= 1) { - VG_(sprintf)(msg_buf, "pthread_setspecific key %d, ptr %p", - key, pointer ); - print_pthread_event(tid, msg_buf); - } - - vg_assert(VG_(is_valid_tid)(tid) - && VG_(threads)[tid].status == VgTs_Runnable); - - if (!is_valid_key(key)) { - VG_(record_pthread_err)( tid, - "pthread_setspecific: key is invalid"); - SET_EDX(tid, EINVAL); - return; - } - - VG_(threads)[tid].specifics[key] = pointer; - SET_EDX(tid, 0); -} - - -/* Helper for calling destructors at thread exit. If key is valid, - copy the thread's specific value into cu->arg and put the *key*'s - destructor fn address in cu->fn. Then return 0 to the caller. - Otherwise return non-zero to the caller. */ -static -void do__get_key_destr_and_spec ( ThreadId tid, - pthread_key_t key, - CleanupEntry* cu ) -{ - Char msg_buf[100]; - if (VG_(clo_trace_pthread_level) >= 1) { - VG_(sprintf)(msg_buf, - "get_key_destr_and_arg (key = %d)", key ); - print_pthread_event(tid, msg_buf); - } - vg_assert(VG_(is_valid_tid)(tid)); - vg_assert(key >= 0 && key < VG_N_THREAD_KEYS); - if (!vg_thread_keys[key].inuse) { - SET_EDX(tid, -1); - return; - } - cu->fn = vg_thread_keys[key].destructor; - cu->arg = VG_(threads)[tid].specifics[key]; - if (VG_(clo_instrument)) - VGM_(make_readable)( (Addr)cu, sizeof(CleanupEntry) ); - SET_EDX(tid, 0); -} - - -/* --------------------------------------------------- - SIGNALS - ------------------------------------------------ */ - -/* See comment in vg_libthread.c:pthread_sigmask() regarding - deliberate confusion of types sigset_t and vki_sigset_t. Return 0 - for OK and 1 for some kind of addressing error, which the - vg_libpthread.c routine turns into return values 0 and EFAULT - respectively. */ -static -void do_pthread_sigmask ( ThreadId tid, - Int vki_how, - vki_ksigset_t* newmask, - vki_ksigset_t* oldmask ) -{ - Char msg_buf[100]; - if (VG_(clo_trace_pthread_level) >= 1) { - VG_(sprintf)(msg_buf, - "pthread_sigmask vki_how %d, newmask %p, oldmask %p", - vki_how, newmask, oldmask ); - print_pthread_event(tid, msg_buf); - } - - vg_assert(VG_(is_valid_tid)(tid) - && VG_(threads)[tid].status == VgTs_Runnable); - - if (VG_(clo_instrument)) { - /* check newmask/oldmask are addressible/defined */ - if (newmask - && !VGM_(check_readable)( (Addr)newmask, - sizeof(vki_ksigset_t), NULL)) - VG_(record_pthread_err)( tid, - "pthread_sigmask: newmask contains " - "unaddressible or undefined bytes"); - if (oldmask - && !VGM_(check_writable)( (Addr)oldmask, - sizeof(vki_ksigset_t), NULL)) - VG_(record_pthread_err)( tid, - "pthread_sigmask: oldmask contains " - "unaddressible bytes"); - } - - VG_(do_pthread_sigmask_SCSS_upd) ( tid, vki_how, newmask, oldmask ); - - if (oldmask && VG_(clo_instrument)) { - VGM_(make_readable)( (Addr)oldmask, sizeof(vki_ksigset_t) ); - } - - /* Success. */ - SET_EDX(tid, 0); -} - - -static -void do_sigwait ( ThreadId tid, - vki_ksigset_t* set, - Int* sig ) -{ - vki_ksigset_t irrelevant_sigmask; - Char msg_buf[100]; - - if (VG_(clo_trace_signals) || VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, - "suspend due to sigwait(): set %p, sig %p", - set, sig ); - print_pthread_event(tid, msg_buf); - } - - vg_assert(VG_(is_valid_tid)(tid) - && VG_(threads)[tid].status == VgTs_Runnable); - - /* Change SCSS */ - VG_(threads)[tid].sigs_waited_for = *set; - VG_(threads)[tid].status = VgTs_WaitSIG; - - VG_(block_all_host_signals)( &irrelevant_sigmask ); - VG_(handle_SCSS_change)( False /* lazy update */ ); -} - - -static -void do_pthread_kill ( ThreadId tid, /* me */ - ThreadId thread, /* thread to signal */ - Int sig ) -{ - Char msg_buf[100]; - - if (VG_(clo_trace_signals) || VG_(clo_trace_pthread_level) >= 1) { - VG_(sprintf)(msg_buf, - "pthread_kill thread %d, signo %d", - thread, sig ); - print_pthread_event(tid, msg_buf); - } - - vg_assert(VG_(is_valid_tid)(tid) - && VG_(threads)[tid].status == VgTs_Runnable); - - if (!VG_(is_valid_tid)(thread)) { - VG_(record_pthread_err)( tid, - "pthread_kill: invalid target thread"); - SET_EDX(tid, -VKI_ESRCH); - return; - } - - if (sig < 1 || sig > VKI_KNSIG) { - SET_EDX(tid, -VKI_EINVAL); - return; - } - - VG_(send_signal_to_thread)( thread, sig ); - SET_EDX(tid, 0); -} - - -/* ----------------------------------------------------------- - FORK HANDLERS. - -------------------------------------------------------- */ - -static -void do__set_fhstack_used ( ThreadId tid, Int n ) -{ - Char msg_buf[100]; - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, "set_fhstack_used to %d", n ); - print_pthread_event(tid, msg_buf); - } - - vg_assert(VG_(is_valid_tid)(tid) - && VG_(threads)[tid].status == VgTs_Runnable); - - if (n >= 0 && n < VG_N_FORKHANDLERSTACK) { - vg_fhstack_used = n; - SET_EDX(tid, 0); - } else { - SET_EDX(tid, -1); - } -} - - -static -void do__get_fhstack_used ( ThreadId tid ) -{ - Int n; - Char msg_buf[100]; - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, "get_fhstack_used" ); - print_pthread_event(tid, msg_buf); - } - - vg_assert(VG_(is_valid_tid)(tid) - && VG_(threads)[tid].status == VgTs_Runnable); - - n = vg_fhstack_used; - vg_assert(n >= 0 && n < VG_N_FORKHANDLERSTACK); - SET_EDX(tid, n); -} - -static -void do__set_fhstack_entry ( ThreadId tid, Int n, ForkHandlerEntry* fh ) -{ - Char msg_buf[100]; - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, "set_fhstack_entry %d to %p", n, fh ); - print_pthread_event(tid, msg_buf); - } - - vg_assert(VG_(is_valid_tid)(tid) - && VG_(threads)[tid].status == VgTs_Runnable); - - if (VG_(clo_instrument)) { - /* check fh is addressible/defined */ - if (!VGM_(check_readable)( (Addr)fh, - sizeof(ForkHandlerEntry), NULL)) { - VG_(record_pthread_err)( tid, - "pthread_atfork: prepare/parent/child contains " - "unaddressible or undefined bytes"); - } - } - - if (n < 0 && n >= VG_N_FORKHANDLERSTACK) { - SET_EDX(tid, -1); - return; - } - - vg_fhstack[n] = *fh; - SET_EDX(tid, 0); -} - - -static -void do__get_fhstack_entry ( ThreadId tid, Int n, /*OUT*/ - ForkHandlerEntry* fh ) -{ - Char msg_buf[100]; - if (VG_(clo_trace_sched)) { - VG_(sprintf)(msg_buf, "get_fhstack_entry %d", n ); - print_pthread_event(tid, msg_buf); - } - - vg_assert(VG_(is_valid_tid)(tid) - && VG_(threads)[tid].status == VgTs_Runnable); - - if (VG_(clo_instrument)) { - /* check fh is addressible/defined */ - if (!VGM_(check_writable)( (Addr)fh, - sizeof(ForkHandlerEntry), NULL)) { - VG_(record_pthread_err)( tid, - "fork: prepare/parent/child contains " - "unaddressible bytes"); - } - } - - if (n < 0 && n >= VG_N_FORKHANDLERSTACK) { - SET_EDX(tid, -1); - return; - } - - *fh = vg_fhstack[n]; - SET_EDX(tid, 0); - - if (VG_(clo_instrument)) { - VGM_(make_readable)( (Addr)fh, sizeof(ForkHandlerEntry) ); - } -} - - -/* --------------------------------------------------------------------- - Handle client requests. - ------------------------------------------------------------------ */ - -/* Do a client request for the thread tid. After the request, tid may - or may not still be runnable; if not, the scheduler will have to - choose a new thread to run. -*/ -static -void do_client_request ( ThreadId tid ) -{ -# define RETURN_WITH(vvv) \ - { tst->m_edx = (vvv); \ - tst->sh_edx = VGM_WORD_VALID; \ - } - - ThreadState* tst = &VG_(threads)[tid]; - UInt* arg = (UInt*)(VG_(threads)[tid].m_eax); - UInt req_no = arg[0]; - - /* VG_(printf)("req no = 0x%x\n", req_no); */ - switch (req_no) { - - case VG_USERREQ__MALLOC: - RETURN_WITH( - (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocMalloc ) - ); - break; - - case VG_USERREQ__BUILTIN_NEW: - RETURN_WITH( - (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNew ) - ); - break; - - case VG_USERREQ__BUILTIN_VEC_NEW: - RETURN_WITH( - (UInt)VG_(client_malloc) ( tst, arg[1], Vg_AllocNewVec ) - ); - break; - - case VG_USERREQ__FREE: - VG_(client_free) ( tst, (void*)arg[1], Vg_AllocMalloc ); - RETURN_WITH(0); /* irrelevant */ - break; - - case VG_USERREQ__BUILTIN_DELETE: - VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNew ); - RETURN_WITH(0); /* irrelevant */ - break; - - case VG_USERREQ__BUILTIN_VEC_DELETE: - VG_(client_free) ( tst, (void*)arg[1], Vg_AllocNewVec ); - RETURN_WITH(0); /* irrelevant */ - break; - - case VG_USERREQ__CALLOC: - RETURN_WITH( - (UInt)VG_(client_calloc) ( tst, arg[1], arg[2] ) - ); - break; - - case VG_USERREQ__REALLOC: - RETURN_WITH( - (UInt)VG_(client_realloc) ( tst, (void*)arg[1], arg[2] ) - ); - break; - - case VG_USERREQ__MEMALIGN: - RETURN_WITH( - (UInt)VG_(client_memalign) ( tst, arg[1], arg[2] ) - ); - break; - - case VG_USERREQ__PTHREAD_GET_THREADID: - RETURN_WITH(tid); - break; - - case VG_USERREQ__RUNNING_ON_VALGRIND: - RETURN_WITH(1); - break; - - case VG_USERREQ__GET_PTHREAD_TRACE_LEVEL: - RETURN_WITH(VG_(clo_trace_pthread_level)); - break; - - case VG_USERREQ__READ_MILLISECOND_TIMER: - RETURN_WITH(VG_(read_millisecond_timer)()); - break; - - /* Some of these may make thread tid non-runnable, but the - scheduler checks for that on return from this function. */ - case VG_USERREQ__PTHREAD_MUTEX_LOCK: - do_pthread_mutex_lock( tid, False, (void *)(arg[1]) ); - break; - - case VG_USERREQ__PTHREAD_MUTEX_TRYLOCK: - do_pthread_mutex_lock( tid, True, (void *)(arg[1]) ); - break; - - case VG_USERREQ__PTHREAD_MUTEX_UNLOCK: - do_pthread_mutex_unlock( tid, (void *)(arg[1]) ); - break; - - case VG_USERREQ__PTHREAD_GETSPECIFIC: - do_pthread_getspecific ( tid, (UInt)(arg[1]) ); - break; - - case VG_USERREQ__SET_CANCELTYPE: - do__set_canceltype ( tid, arg[1] ); - break; - - case VG_USERREQ__CLEANUP_PUSH: - do__cleanup_push ( tid, (CleanupEntry*)(arg[1]) ); - break; - - case VG_USERREQ__CLEANUP_POP: - do__cleanup_pop ( tid, (CleanupEntry*)(arg[1]) ); - break; - - case VG_USERREQ__TESTCANCEL: - do__testcancel ( tid ); - break; - - case VG_USERREQ__GET_N_SIGS_RETURNED: - RETURN_WITH(VG_(threads)[tid].n_signals_returned); - break; - - case VG_USERREQ__PTHREAD_JOIN: - do_pthread_join( tid, arg[1], (void**)(arg[2]) ); - break; - - case VG_USERREQ__PTHREAD_COND_WAIT: - do_pthread_cond_wait( tid, - (pthread_cond_t *)(arg[1]), - (pthread_mutex_t *)(arg[2]), - 0xFFFFFFFF /* no timeout */ ); - break; - - case VG_USERREQ__PTHREAD_COND_TIMEDWAIT: - do_pthread_cond_wait( tid, - (pthread_cond_t *)(arg[1]), - (pthread_mutex_t *)(arg[2]), - arg[3] /* timeout millisecond point */ ); - break; - - case VG_USERREQ__PTHREAD_COND_SIGNAL: - do_pthread_cond_signal_or_broadcast( - tid, - False, /* signal, not broadcast */ - (pthread_cond_t *)(arg[1]) ); - break; - - case VG_USERREQ__PTHREAD_COND_BROADCAST: - do_pthread_cond_signal_or_broadcast( - tid, - True, /* broadcast, not signal */ - (pthread_cond_t *)(arg[1]) ); - break; - - case VG_USERREQ__PTHREAD_KEY_CREATE: - do_pthread_key_create ( tid, - (pthread_key_t*)(arg[1]), - (void(*)(void*))(arg[2]) ); - break; - - case VG_USERREQ__PTHREAD_KEY_DELETE: - do_pthread_key_delete ( tid, - (pthread_key_t)(arg[1]) ); - break; - - case VG_USERREQ__PTHREAD_SETSPECIFIC: - do_pthread_setspecific ( tid, - (pthread_key_t)(arg[1]), - (void*)(arg[2]) ); - break; - - case VG_USERREQ__PTHREAD_SIGMASK: - do_pthread_sigmask ( tid, - arg[1], - (vki_ksigset_t*)(arg[2]), - (vki_ksigset_t*)(arg[3]) ); - break; - - case VG_USERREQ__SIGWAIT: - do_sigwait ( tid, - (vki_ksigset_t*)(arg[1]), - (Int*)(arg[2]) ); - break; - - case VG_USERREQ__PTHREAD_KILL: - do_pthread_kill ( tid, arg[1], arg[2] ); - break; - - case VG_USERREQ__PTHREAD_YIELD: - do_pthread_yield ( tid ); - /* On return from do_client_request(), the scheduler will - select a new thread to run. */ - break; - - case VG_USERREQ__SET_CANCELSTATE: - do__set_cancelstate ( tid, arg[1] ); - break; - - case VG_USERREQ__SET_OR_GET_DETACH: - do__set_or_get_detach ( tid, arg[1], arg[2] ); - break; - - case VG_USERREQ__SET_CANCELPEND: - do__set_cancelpend ( tid, arg[1], (void(*)(void*))arg[2] ); - break; - - case VG_USERREQ__WAIT_JOINER: - do__wait_joiner ( tid, (void*)arg[1] ); - break; - - case VG_USERREQ__QUIT: - do__quit ( tid ); - break; - - case VG_USERREQ__APPLY_IN_NEW_THREAD: - do__apply_in_new_thread ( tid, (void*(*)(void*))arg[1], - (void*)arg[2] ); - break; - - case VG_USERREQ__GET_KEY_D_AND_S: - do__get_key_destr_and_spec ( tid, - (pthread_key_t)arg[1], - (CleanupEntry*)arg[2] ); - break; - - case VG_USERREQ__NUKE_OTHER_THREADS: - VG_(nuke_all_threads_except) ( tid ); - SET_EDX(tid, 0); - break; - - case VG_USERREQ__PTHREAD_ERROR: - VG_(record_pthread_err)( tid, (Char*)(arg[1]) ); - SET_EDX(tid, 0); - break; - - case VG_USERREQ__SET_FHSTACK_USED: - do__set_fhstack_used( tid, (Int)(arg[1]) ); - break; - - case VG_USERREQ__GET_FHSTACK_USED: - do__get_fhstack_used( tid ); - break; - - case VG_USERREQ__SET_FHSTACK_ENTRY: - do__set_fhstack_entry( tid, (Int)(arg[1]), - (ForkHandlerEntry*)(arg[2]) ); - break; - - case VG_USERREQ__GET_FHSTACK_ENTRY: - do__get_fhstack_entry( tid, (Int)(arg[1]), - (ForkHandlerEntry*)(arg[2]) ); - break; - - case VG_USERREQ__MAKE_NOACCESS: - case VG_USERREQ__MAKE_WRITABLE: - case VG_USERREQ__MAKE_READABLE: - case VG_USERREQ__DISCARD: - case VG_USERREQ__CHECK_WRITABLE: - case VG_USERREQ__CHECK_READABLE: - case VG_USERREQ__MAKE_NOACCESS_STACK: - case VG_USERREQ__DO_LEAK_CHECK: - case VG_USERREQ__DISCARD_TRANSLATIONS: - SET_EDX( - tid, - VG_(handle_client_request) ( &VG_(threads)[tid], arg ) - ); - break; - - case VG_USERREQ__SIGNAL_RETURNS: - handle_signal_return(tid); - break; - - default: - VG_(printf)("panic'd on client request = 0x%x\n", arg[0] ); - VG_(panic)("do_client_request: " - "unknown request"); - /*NOTREACHED*/ - break; - } - -# undef RETURN_WITH -} - - -/* --------------------------------------------------------------------- - Sanity checking. - ------------------------------------------------------------------ */ - -/* Internal consistency checks on the sched/pthread structures. */ -static -void scheduler_sanity ( void ) -{ - pthread_mutex_t* mx; - pthread_cond_t* cv; - Int i; - - /* VG_(printf)("scheduler_sanity\n"); */ - for (i = 1; i < VG_N_THREADS; i++) { - mx = VG_(threads)[i].associated_mx; - cv = VG_(threads)[i].associated_cv; - if (VG_(threads)[i].status == VgTs_WaitMX) { - /* If we're waiting on a MX: (1) the mx is not null, (2, 3) - it's actually held by someone, since otherwise this thread - is deadlocked, (4) the mutex's owner is not us, since - otherwise this thread is also deadlocked. The logic in - do_pthread_mutex_lock rejects attempts by a thread to lock - a (non-recursive) mutex which it already owns. - - (2) has been seen to fail sometimes. I don't know why. - Possibly to do with signals. */ - vg_assert(cv == NULL); - /* 1 */ vg_assert(mx != NULL); - /* 2 */ vg_assert(mx->__m_count > 0); - /* 3 */ vg_assert(VG_(is_valid_tid)((ThreadId)mx->__m_owner)); - /* 4 */ vg_assert(i != (ThreadId)mx->__m_owner); - } else - if (VG_(threads)[i].status == VgTs_WaitCV) { - vg_assert(cv != NULL); - vg_assert(mx != NULL); - } else { - /* Unfortunately these don't hold true when a sighandler is - running. To be fixed. */ - /* vg_assert(cv == NULL); */ - /* vg_assert(mx == NULL); */ - } - - if (VG_(threads)[i].status != VgTs_Empty) { - Int - stack_used = (Addr)VG_(threads)[i].stack_highest_word - - (Addr)VG_(threads)[i].m_esp; - if (i > 1 /* not the root thread */ - && stack_used - >= (VG_PTHREAD_STACK_MIN - 1000 /* paranoia */)) { - VG_(message)(Vg_UserMsg, - "Warning: STACK OVERFLOW: " - "thread %d: stack used %d, available %d", - i, stack_used, VG_PTHREAD_STACK_MIN ); - VG_(message)(Vg_UserMsg, - "Terminating Valgrind. If thread(s) " - "really need more stack, increase"); - VG_(message)(Vg_UserMsg, - "VG_PTHREAD_STACK_SIZE in vg_include.h and recompile."); - VG_(exit)(1); - } - - if (VG_(threads)[i].status == VgTs_WaitSIG) { - vg_assert( ! VG_(kisemptysigset)( - & VG_(threads)[i].sigs_waited_for) ); - } else { - vg_assert( VG_(kisemptysigset)( - & VG_(threads)[i].sigs_waited_for) ); - } - - } - } - - for (i = 0; i < VG_N_THREAD_KEYS; i++) { - if (!vg_thread_keys[i].inuse) - vg_assert(vg_thread_keys[i].destructor == NULL); - } -} - - -/*--------------------------------------------------------------------*/ -/*--- end vg_scheduler.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_signals.c b/coregrind/vg_signals.c deleted file mode 100644 index f58ec11e96..0000000000 --- a/coregrind/vg_signals.c +++ /dev/null @@ -1,1531 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- Implementation of POSIX signals. ---*/ -/*--- vg_signals.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - - -#include "vg_include.h" -#include "vg_constants.h" -#include "vg_unsafe.h" -#include "valgrind.h" /* for VALGRIND_MAGIC_SEQUENCE */ - -/* Define to give more sanity checking for signals. */ -#define DEBUG_SIGNALS - - -/* KNOWN BUGS 24 May 02: - - - A signal is not masked in its own handler. Neither are the - signals in the signal's blocking mask. - - - There is only one pending set for the entire process, whereas - POSIX seems to require each thread have its own pending set. - This means that a signal can only be pending for one thread at - a time. - - - The following causes an infinite loop: start Hugs, Feb 2001 - version, and do Control-C at the prompt. There is an infinite - series of sigints delivered (to the client); but also seemingly - to valgrind, which is very strange. I don't know why. - - Probably a lot more bugs which I haven't discovered yet. -*/ - - -/* --------------------------------------------------------------------- - Forwards decls. - ------------------------------------------------------------------ */ - -static void vg_oursignalhandler ( Int sigNo ); - - -/* --------------------------------------------------------------------- - HIGH LEVEL STUFF TO DO WITH SIGNALS: POLICY (MOSTLY) - ------------------------------------------------------------------ */ - -/* --------------------------------------------------------------------- - Signal state for this process. - ------------------------------------------------------------------ */ - - -/* Base-ment of these arrays[VKI_KNSIG]. - - Valid signal numbers are 1 .. VKI_KNSIG inclusive. - Rather than subtracting 1 for indexing these arrays, which - is tedious and error-prone, they are simply dimensioned 1 larger, - and entry [0] is not used. - */ - - -/* ----------------------------------------------------- - Static client signal state (SCSS). This is the state - that the client thinks it has the kernel in. - SCSS records verbatim the client's settings. These - are mashed around only when SKSS is calculated from it. - -------------------------------------------------- */ - -typedef - struct { - void* scss_handler; /* VKI_SIG_DFL or VKI_SIG_IGN or ptr to - client's handler */ - UInt scss_flags; - vki_ksigset_t scss_mask; - void* scss_restorer; /* god knows; we ignore it. */ - } - SCSS_Per_Signal; - -typedef - struct { - /* per-signal info */ - SCSS_Per_Signal scss_per_sig[1+VKI_KNSIG]; - - /* Signal delivery stack, if any. */ - vki_kstack_t altstack; - - /* Additional elements to SCSS not stored here: - - for each thread, the thread's blocking mask - - for each thread in WaitSIG, the set of waited-on sigs - */ - } - SCSS; - -static SCSS vg_scss; - - -/* ----------------------------------------------------- - Static kernel signal state (SKSS). This is the state - that we have the kernel in. It is computed from SCSS. - -------------------------------------------------- */ - -/* Let's do: - sigprocmask assigns to all thread masks - so that at least everything is always consistent - Flags: - SA_NOCLDSTOP -- passed to kernel - SA_ONESHOT or SA_RESETHAND -- required; abort if not set - SA_RESTART -- we observe this but set our handlers always to restart - SA_NOMASK or SA_NODEFER -- required to not be set; abort if set - SA_ONSTACK -- currently not supported; abort if set. -*/ - - -typedef - struct { - void* skss_handler; /* VKI_SIG_DFL or VKI_SIG_IGN - or ptr to our handler */ - UInt skss_flags; - /* There is no skss_mask, since we know that we will always ask - for all signals to be blocked in our one-and-only - sighandler. */ - /* Also there is no skss_restorer. */ - } - SKSS_Per_Signal; - -typedef - struct { - SKSS_Per_Signal skss_per_sig[1+VKI_KNSIG]; - vki_ksigset_t skss_sigmask; /* process' blocked signal mask */ - } - SKSS; - -static SKSS vg_skss; - - -/* ----------------------------------------------------- - Dynamic client signal state (DCSS). This holds transient - information about state of client signals. - -------------------------------------------------- */ - -typedef - struct { - /* True iff a signal has been received but not yet passed to - client. */ - Bool dcss_sigpending[1+VKI_KNSIG]; - /* If sigpending[] is True, has meaning: - VG_INVALID_THREADID -- to be passed to any suitable thread - other -- to be passed only to the specified thread. */ - ThreadId dcss_destthread[1+VKI_KNSIG]; - } - DCSS; - -static DCSS vg_dcss; - - -/* --------------------------------------------------------------------- - Compute the SKSS required by the current SCSS. - ------------------------------------------------------------------ */ - -static -void pp_SKSS ( void ) -{ - Int sig; - VG_(printf)("\n\nSKSS:\n"); - for (sig = 1; sig <= VKI_KNSIG; sig++) { - VG_(printf)("sig %d: handler 0x%x, flags 0x%x\n", sig, - vg_skss.skss_per_sig[sig].skss_handler, - vg_skss.skss_per_sig[sig].skss_flags ); - - } - VG_(printf)("Global sigmask (63 .. 0) = 0x%x 0x%x\n", - vg_skss.skss_sigmask.ws[1], - vg_skss.skss_sigmask.ws[0] ); -} - -static __inline__ -Bool is_WaitSIGd_by_any_thread ( Int sig ) -{ - ThreadId tid; - for (tid = 1; tid < VG_N_THREADS; tid++) { - if (VG_(threads)[tid].status != VgTs_WaitSIG) - continue; - if (VG_(ksigismember)( &VG_(threads)[tid].sigs_waited_for, sig )) - return True; - } - return False; -} - -static __inline__ -Bool is_blocked_by_all_threads ( Int sig ) -{ - ThreadId tid; - for (tid = 1; tid < VG_N_THREADS; tid++) { - if (VG_(threads)[tid].status == VgTs_Empty) - continue; - if (! VG_(ksigismember)( &VG_(threads)[tid].sig_mask, sig )) - return False; - } - return True; -} - - -/* This is the core, clever bit. Computation is as follows: - - For each signal - handler = if client has a handler, then our handler - else if is WaitSIG'd by any thread, then our handler - else if client is DFL, then DFL - else (client must be IGN) IGN - - blocked = if is blocked by all threads and not WaitSIG'd by - any thread - then BLOCKED - else UNBLOCKED -*/ -static -void calculate_SKSS_from_SCSS ( SKSS* dst ) -{ - Int sig; - void* skss_handler; - void* scss_handler; - Bool iz_WaitSIGd_by_any_thread; - Bool iz_blocked_by_all_threads; - Bool skss_blocked; - UInt scss_flags; - UInt skss_flags; - - VG_(ksigemptyset)( &dst->skss_sigmask ); - - for (sig = 1; sig <= VKI_KNSIG; sig++) { - - /* Calculate kernel handler and blockedness for sig, as per rules - in above comment. */ - - iz_WaitSIGd_by_any_thread = is_WaitSIGd_by_any_thread(sig); - iz_blocked_by_all_threads = is_blocked_by_all_threads(sig); - - scss_handler = vg_scss.scss_per_sig[sig].scss_handler; - scss_flags = vg_scss.scss_per_sig[sig].scss_flags; - - /* Restorer */ - /* - Doesn't seem like we can spin this one. - if (vg_scss.scss_per_sig[sig].scss_restorer != NULL) - VG_(unimplemented) - ("sigactions with non-NULL .sa_restorer field"); - */ - - /* Handler */ - - if (scss_handler != VKI_SIG_DFL && scss_handler != VKI_SIG_IGN) { - skss_handler = &vg_oursignalhandler; - } else - if (iz_WaitSIGd_by_any_thread) { - skss_handler = &vg_oursignalhandler; - } else - if (scss_handler == VKI_SIG_DFL) { - skss_handler = VKI_SIG_DFL; - } - else { - vg_assert(scss_handler == VKI_SIG_IGN); - skss_handler = VKI_SIG_IGN; - } - - /* Blockfulness */ - - skss_blocked - = iz_blocked_by_all_threads && !iz_WaitSIGd_by_any_thread; - - /* Flags */ - - skss_flags = 0; - /* SA_NOCLDSTOP: pass to kernel */ - if (scss_flags & VKI_SA_NOCLDSTOP) - skss_flags |= VKI_SA_NOCLDSTOP; - /* SA_ONESHOT: ignore client setting */ - /* - if (!(scss_flags & VKI_SA_ONESHOT)) - VG_(unimplemented) - ("sigactions without SA_ONESHOT"); - vg_assert(scss_flags & VKI_SA_ONESHOT); - skss_flags |= VKI_SA_ONESHOT; - */ - /* SA_RESTART: ignore client setting and set for us */ - skss_flags |= VKI_SA_RESTART; - /* SA_NOMASK: not allowed */ - /* - .. well, ignore it. - if (scss_flags & VKI_SA_NOMASK) - VG_(unimplemented) - ("sigactions with SA_NOMASK"); - vg_assert(!(scss_flags & VKI_SA_NOMASK)); - */ - /* SA_ONSTACK: client setting is irrelevant here */ - /* - if (scss_flags & VKI_SA_ONSTACK) - VG_(unimplemented) - ("signals on an alternative stack (SA_ONSTACK)"); - vg_assert(!(scss_flags & VKI_SA_ONSTACK)); - */ - /* ... but WE ask for on-stack ourselves ... */ - skss_flags |= VKI_SA_ONSTACK; - - /* Create SKSS entry for this signal. */ - - if (skss_blocked - && sig != VKI_SIGKILL && sig != VKI_SIGSTOP) - VG_(ksigaddset)( &dst->skss_sigmask, sig ); - - if (sig != VKI_SIGKILL && sig != VKI_SIGSTOP) - dst->skss_per_sig[sig].skss_handler = skss_handler; - else - dst->skss_per_sig[sig].skss_handler = VKI_SIG_DFL; - - dst->skss_per_sig[sig].skss_flags = skss_flags; - } - - /* Sanity checks. */ - vg_assert(dst->skss_per_sig[VKI_SIGKILL].skss_handler - == VKI_SIG_DFL); - vg_assert(dst->skss_per_sig[VKI_SIGSTOP].skss_handler - == VKI_SIG_DFL); - vg_assert(!VG_(ksigismember)( &dst->skss_sigmask, VKI_SIGKILL )); - vg_assert(!VG_(ksigismember)( &dst->skss_sigmask, VKI_SIGSTOP )); - - if (0) - pp_SKSS(); -} - - -/* --------------------------------------------------------------------- - After a possible SCSS change, update SKSS and the kernel itself. - ------------------------------------------------------------------ */ - -/* IMPORTANT NOTE: to avoid race conditions, we must always enter here - with ALL KERNEL SIGNALS BLOCKED ! -*/ -void VG_(handle_SCSS_change) ( Bool force_update ) -{ - Int res, sig; - SKSS skss_old; - vki_ksigaction ksa, ksa_old; - -# ifdef DEBUG_SIGNALS - vki_ksigset_t test_sigmask; - res = VG_(ksigprocmask)( VKI_SIG_SETMASK /*irrelevant*/, - NULL, &test_sigmask ); - vg_assert(res == 0); - /* The kernel never says that SIGKILL or SIGSTOP are masked. It is - correct! So we fake it here for the purposes only of - assertion. */ - VG_(ksigaddset)( &test_sigmask, VKI_SIGKILL ); - VG_(ksigaddset)( &test_sigmask, VKI_SIGSTOP ); - vg_assert(VG_(kisfullsigset)( &test_sigmask )); -# endif - - /* Remember old SKSS and calculate new one. */ - skss_old = vg_skss; - calculate_SKSS_from_SCSS ( &vg_skss ); - - /* Compare the new SKSS entries vs the old ones, and update kernel - where they differ. */ - for (sig = 1; sig <= VKI_KNSIG; sig++) { - - /* Trying to do anything with SIGKILL is pointless; just ignore - it. */ - if (sig == VKI_SIGKILL || sig == VKI_SIGSTOP) - continue; - - /* Aside: take the opportunity to clean up DCSS: forget about any - pending signals directed at dead threads. */ - if (vg_dcss.dcss_sigpending[sig] - && vg_dcss.dcss_destthread[sig] != VG_INVALID_THREADID) { - ThreadId tid = vg_dcss.dcss_destthread[sig]; - vg_assert(VG_(is_valid_or_empty_tid)(tid)); - if (VG_(threads)[tid].status == VgTs_Empty) { - vg_dcss.dcss_sigpending[sig] = False; - vg_dcss.dcss_destthread[sig] = VG_INVALID_THREADID; - if (VG_(clo_trace_signals)) - VG_(message)(Vg_DebugMsg, - "discarding pending signal %d due to thread %d exiting", - sig, tid ); - } - } - - /* End of the Aside. Now the Main Business. */ - - if (!force_update) { - if ((skss_old.skss_per_sig[sig].skss_handler - == vg_skss.skss_per_sig[sig].skss_handler) - && (skss_old.skss_per_sig[sig].skss_flags - == vg_skss.skss_per_sig[sig].skss_flags)) - /* no difference */ - continue; - } - - ksa.ksa_handler = vg_skss.skss_per_sig[sig].skss_handler; - ksa.ksa_flags = vg_skss.skss_per_sig[sig].skss_flags; - vg_assert(ksa.ksa_flags & VKI_SA_ONSTACK); - VG_(ksigfillset)( &ksa.ksa_mask ); - VG_(ksigdelset)( &ksa.ksa_mask, VKI_SIGKILL ); - VG_(ksigdelset)( &ksa.ksa_mask, VKI_SIGSTOP ); - ksa.ksa_restorer = NULL; - - if (VG_(clo_trace_signals)) - VG_(message)(Vg_DebugMsg, - "setting ksig %d to: hdlr 0x%x, flags 0x%x, " - "mask(63..0) 0x%x 0x%x", - sig, ksa.ksa_handler, - ksa.ksa_flags, - ksa.ksa_mask.ws[1], - ksa.ksa_mask.ws[0] - ); - - res = VG_(ksigaction)( sig, &ksa, &ksa_old ); - vg_assert(res == 0); - - /* Since we got the old sigaction more or less for free, might - as well extract the maximum sanity-check value from it. */ - if (!force_update) { - vg_assert(ksa_old.ksa_handler - == skss_old.skss_per_sig[sig].skss_handler); - vg_assert(ksa_old.ksa_flags - == skss_old.skss_per_sig[sig].skss_flags); - vg_assert(ksa_old.ksa_restorer - == NULL); - VG_(ksigaddset)( &ksa_old.ksa_mask, VKI_SIGKILL ); - VG_(ksigaddset)( &ksa_old.ksa_mask, VKI_SIGSTOP ); - vg_assert(VG_(kisfullsigset)( &ksa_old.ksa_mask )); - } - } - - /* Just set the new sigmask, even if it's no different from the - old, since we have to do this anyway, to unblock the host - signals. */ - if (VG_(clo_trace_signals)) - VG_(message)(Vg_DebugMsg, - "setting kmask(63..0) to 0x%x 0x%x", - vg_skss.skss_sigmask.ws[1], - vg_skss.skss_sigmask.ws[0] - ); - - VG_(restore_all_host_signals)( &vg_skss.skss_sigmask ); -} - - -/* --------------------------------------------------------------------- - Update/query SCSS in accordance with client requests. - ------------------------------------------------------------------ */ - -/* Logic for this alt-stack stuff copied directly from do_sigaltstack - in kernel/signal.[ch] */ - -/* True if we are on the alternate signal stack. */ -static Int on_sig_stack ( Addr m_esp ) -{ - return (m_esp - (Addr)vg_scss.altstack.ss_sp - < vg_scss.altstack.ss_size); -} - -static Int sas_ss_flags ( Addr m_esp ) -{ - return (vg_scss.altstack.ss_size == 0 - ? VKI_SS_DISABLE - : on_sig_stack(m_esp) ? VKI_SS_ONSTACK : 0); -} - - -void VG_(do__NR_sigaltstack) ( ThreadId tid ) -{ - vki_kstack_t* ss; - vki_kstack_t* oss; - Addr m_esp; - - vg_assert(VG_(is_valid_tid)(tid)); - ss = (vki_kstack_t*)(VG_(threads)[tid].m_ebx); - oss = (vki_kstack_t*)(VG_(threads)[tid].m_ecx); - m_esp = VG_(threads)[tid].m_esp; - - if (VG_(clo_trace_signals)) - VG_(message)(Vg_DebugExtraMsg, - "__NR_sigaltstack: tid %d, " - "ss 0x%x, oss 0x%x (current %%esp %p)", - tid, (UInt)ss, (UInt)oss, (UInt)m_esp ); - - if (oss != NULL) { - oss->ss_sp = vg_scss.altstack.ss_sp; - oss->ss_size = vg_scss.altstack.ss_size; - oss->ss_flags = sas_ss_flags(m_esp); - } - - if (ss != NULL) { - if (on_sig_stack(VG_(threads)[tid].m_esp)) { - SET_EAX(tid, -VKI_EPERM); - return; - } - if (ss->ss_flags != VKI_SS_DISABLE - && ss->ss_flags != VKI_SS_ONSTACK - && ss->ss_flags != 0) { - SET_EAX(tid, -VKI_EINVAL); - return; - } - if (ss->ss_flags == VKI_SS_DISABLE) { - vg_scss.altstack.ss_size = 0; - vg_scss.altstack.ss_sp = NULL; - } else { - if (ss->ss_size < VKI_MINSIGSTKSZ) { - SET_EAX(tid, -VKI_ENOMEM); - return; - } - } - vg_scss.altstack.ss_sp = ss->ss_sp; - vg_scss.altstack.ss_size = ss->ss_size; - } - SET_EAX(tid, 0); -} - - -void VG_(do__NR_sigaction) ( ThreadId tid ) -{ - Int signo; - vki_ksigaction* new_act; - vki_ksigaction* old_act; - vki_ksigset_t irrelevant_sigmask; - - vg_assert(VG_(is_valid_tid)(tid)); - signo = VG_(threads)[tid].m_ebx; /* int sigNo */ - new_act = (vki_ksigaction*)(VG_(threads)[tid].m_ecx); - old_act = (vki_ksigaction*)(VG_(threads)[tid].m_edx); - - if (VG_(clo_trace_signals)) - VG_(message)(Vg_DebugExtraMsg, - "__NR_sigaction: tid %d, sigNo %d, " - "new 0x%x, old 0x%x, new flags 0x%x", - tid, signo, (UInt)new_act, (UInt)old_act, - (UInt)(new_act ? new_act->ksa_flags : 0) ); - - /* Rule out various error conditions. The aim is to ensure that if - when the call is passed to the kernel it will definitely - succeed. */ - - /* Reject out-of-range signal numbers. */ - if (signo < 1 || signo > VKI_KNSIG) goto bad_signo; - - /* Reject attempts to set a handler (or set ignore) for SIGKILL. */ - if ( (signo == VKI_SIGKILL || signo == VKI_SIGSTOP) - && new_act - && new_act->ksa_handler != VKI_SIG_DFL) - goto bad_sigkill_or_sigstop; - - /* If the client supplied non-NULL old_act, copy the relevant SCSS - entry into it. */ - if (old_act) { - old_act->ksa_handler = vg_scss.scss_per_sig[signo].scss_handler; - old_act->ksa_flags = vg_scss.scss_per_sig[signo].scss_flags; - old_act->ksa_mask = vg_scss.scss_per_sig[signo].scss_mask; - old_act->ksa_restorer = vg_scss.scss_per_sig[signo].scss_restorer; - } - - /* And now copy new SCSS entry from new_act. */ - if (new_act) { - vg_scss.scss_per_sig[signo].scss_handler = new_act->ksa_handler; - vg_scss.scss_per_sig[signo].scss_flags = new_act->ksa_flags; - vg_scss.scss_per_sig[signo].scss_mask = new_act->ksa_mask; - vg_scss.scss_per_sig[signo].scss_restorer = new_act->ksa_restorer; - } - - /* All happy bunnies ... */ - if (new_act) { - VG_(block_all_host_signals)( &irrelevant_sigmask ); - VG_(handle_SCSS_change)( False /* lazy update */ ); - } - SET_EAX(tid, 0); - return; - - bad_signo: - VG_(message)(Vg_UserMsg, - "Warning: bad signal number %d in __NR_sigaction.", - signo); - SET_EAX(tid, -VKI_EINVAL); - return; - - bad_sigkill_or_sigstop: - VG_(message)(Vg_UserMsg, - "Warning: attempt to set %s handler in __NR_sigaction.", - signo == VKI_SIGKILL ? "SIGKILL" : "SIGSTOP" ); - - SET_EAX(tid, -VKI_EINVAL); - return; -} - - -static -void do_sigprocmask_bitops ( Int vki_how, - vki_ksigset_t* orig_set, - vki_ksigset_t* modifier ) -{ - switch (vki_how) { - case VKI_SIG_BLOCK: - VG_(ksigaddset_from_set)( orig_set, modifier ); - break; - case VKI_SIG_UNBLOCK: - VG_(ksigdelset_from_set)( orig_set, modifier ); - break; - case VKI_SIG_SETMASK: - *orig_set = *modifier; - break; - default: - VG_(panic)("do_sigprocmask_bitops"); - break; - } -} - -/* Handle blocking mask set/get uniformly for threads and process as a - whole. If tid==VG_INVALID_THREADID, this is really - __NR_sigprocmask, in which case we set the masks for all threads to - the "set" and return in "oldset" that from the root thread (1). - Otherwise, tid will denote a valid thread, in which case we just - set/get its mask. - - Note that the thread signal masks are an implicit part of SCSS, - which is why this routine is allowed to mess with them. -*/ -static -void do_setmask ( ThreadId tid, - Int how, - vki_ksigset_t* newset, - vki_ksigset_t* oldset ) -{ - vki_ksigset_t irrelevant_sigmask; - - if (VG_(clo_trace_signals)) - VG_(message)(Vg_DebugExtraMsg, - "do_setmask: tid = %d (%d means ALL), how = %d (%s), set = %p", - tid, - VG_INVALID_THREADID, - how, - how==VKI_SIG_BLOCK ? "SIG_BLOCK" : ( - how==VKI_SIG_UNBLOCK ? "SIG_UNBLOCK" : ( - how==VKI_SIG_SETMASK ? "SIG_SETMASK" : "???")), - newset - ); - - if (tid == VG_INVALID_THREADID) { - /* Behave as if __NR_sigprocmask. */ - if (oldset) { - /* A bit fragile. Should do better here really. */ - vg_assert(VG_(threads)[1].status != VgTs_Empty); - *oldset = VG_(threads)[1].sig_mask; - } - if (newset) { - ThreadId tidd; - for (tidd = 1; tidd < VG_N_THREADS; tidd++) { - if (VG_(threads)[tidd].status == VgTs_Empty) - continue; - do_sigprocmask_bitops ( - how, &VG_(threads)[tidd].sig_mask, newset ); - } - } - } else { - /* Just do this thread. */ - vg_assert(VG_(is_valid_tid)(tid)); - if (oldset) - *oldset = VG_(threads)[tid].sig_mask; - if (newset) - do_sigprocmask_bitops ( - how, &VG_(threads)[tid].sig_mask, newset ); - } - - if (newset) { - VG_(block_all_host_signals)( &irrelevant_sigmask ); - VG_(handle_SCSS_change)( False /* lazy update */ ); - } -} - - -void VG_(do__NR_sigprocmask) ( ThreadId tid, - Int how, - vki_ksigset_t* set, - vki_ksigset_t* oldset ) -{ - if (how == VKI_SIG_BLOCK || how == VKI_SIG_UNBLOCK - || how == VKI_SIG_SETMASK) { - vg_assert(VG_(is_valid_tid)(tid)); - do_setmask ( VG_INVALID_THREADID, how, set, oldset ); - /* Syscall returns 0 (success) to its thread. */ - SET_EAX(tid, 0); - } else { - VG_(message)(Vg_DebugMsg, - "sigprocmask: unknown `how' field %d", how); - SET_EAX(tid, -VKI_EINVAL); - } -} - - -void VG_(do_pthread_sigmask_SCSS_upd) ( ThreadId tid, - Int how, - vki_ksigset_t* set, - vki_ksigset_t* oldset ) -{ - /* Assume that how has been validated by caller. */ - vg_assert(how == VKI_SIG_BLOCK || how == VKI_SIG_UNBLOCK - || how == VKI_SIG_SETMASK); - vg_assert(VG_(is_valid_tid)(tid)); - do_setmask ( tid, how, set, oldset ); - /* The request return code is set in do_pthread_sigmask */ -} - - -void VG_(send_signal_to_thread) ( ThreadId thread, Int sig ) -{ - Int res; - vg_assert(VG_(is_valid_tid)(thread)); - vg_assert(sig >= 1 && sig <= VKI_KNSIG); - - switch ((UInt)(vg_scss.scss_per_sig[sig].scss_handler)) { - - case ((UInt)VKI_SIG_IGN): - if (VG_(clo_trace_signals)) - VG_(message)(Vg_DebugMsg, - "send_signal %d to_thread %d: IGN, ignored", sig, thread ); - break; - - case ((UInt)VKI_SIG_DFL): - /* This is the tricky case. Since we don't handle default - actions, the simple thing is to send someone round to the - front door and signal there. Then the kernel will do - whatever it does with the default action. */ - res = VG_(kill)( VG_(getpid)(), sig ); - vg_assert(res == 0); - break; - - default: - if (!vg_dcss.dcss_sigpending[sig]) { - vg_dcss.dcss_sigpending[sig] = True; - vg_dcss.dcss_destthread[sig] = thread; - if (VG_(clo_trace_signals)) - VG_(message)(Vg_DebugMsg, - "send_signal %d to_thread %d: now pending", sig, thread ); - } else { - if (vg_dcss.dcss_destthread[sig] == thread) { - if (VG_(clo_trace_signals)) - VG_(message)(Vg_DebugMsg, - "send_signal %d to_thread %d: already pending ... " - "discarded", sig, thread ); - } else { - if (VG_(clo_trace_signals)) - VG_(message)(Vg_DebugMsg, - "send_signal %d to_thread %d: was pending for %d, " - "now pending for %d", - sig, thread, vg_dcss.dcss_destthread[sig], thread ); - vg_dcss.dcss_destthread[sig] = thread; - } - } - } -} - - -/* Store in set the signals which could be delivered to this thread - right now (since they are pending) but cannot be, because the - thread has masked them out. */ -void VG_(do_sigpending) ( ThreadId tid, vki_ksigset_t* set ) -{ - Int sig, res; - Bool maybe_pend; - vki_ksigset_t process_pending; - - /* Get the set of signals which are pending for the process as a - whole. */ - res = VG_(sigpending)( &process_pending ); - vg_assert(res == 0); - - VG_(ksigemptyset)(set); - for (sig = 1; sig <= VKI_KNSIG; sig++) { - - /* Figure out if the signal could be pending for this thread. - There are two cases. */ - maybe_pend = False; - - /* Case 1: perhaps the signal is pending for the process as a - whole -- that is, is blocked even valgrind's signal - handler. */ - if (VG_(ksigismember)( &process_pending, sig )) - maybe_pend = True; - - /* Case 2: the signal has been collected by our handler and is - now awaiting disposition inside valgrind. */ - if (/* is it pending at all? */ - vg_dcss.dcss_sigpending[sig] - && - /* check it is not specifically directed to some other thread */ - (vg_dcss.dcss_destthread[sig] == VG_INVALID_THREADID - || vg_dcss.dcss_destthread[sig] == tid) - ) - maybe_pend = True; - - if (!maybe_pend) - continue; /* this signal just ain't pending! */ - - /* Check other necessary conditions now ... */ - - if (VG_(ksigismember)( &VG_(threads)[tid].sigs_waited_for, sig )) - continue; /* tid is sigwaiting for sig, so will never be - offered to a handler */ - if (! VG_(ksigismember)( &VG_(threads)[tid].sig_mask, sig )) - continue; /* not blocked in this thread */ - - /* Ok, sig could be delivered to this thread if only it wasn't - masked out. So we add it to set. */ - VG_(ksigaddset)( set, sig ); - } -} - - -/* --------------------------------------------------------------------- - LOW LEVEL STUFF TO DO WITH SIGNALS: IMPLEMENTATION - ------------------------------------------------------------------ */ - -/* --------------------------------------------------------------------- - Handy utilities to block/restore all host signals. - ------------------------------------------------------------------ */ - -/* Block all host signals, dumping the old mask in *saved_mask. */ -void VG_(block_all_host_signals) ( /* OUT */ vki_ksigset_t* saved_mask ) -{ - Int ret; - vki_ksigset_t block_procmask; - VG_(ksigfillset)(&block_procmask); - ret = VG_(ksigprocmask) - (VKI_SIG_SETMASK, &block_procmask, saved_mask); - vg_assert(ret == 0); -} - -/* Restore the blocking mask using the supplied saved one. */ -void VG_(restore_all_host_signals) ( /* IN */ vki_ksigset_t* saved_mask ) -{ - Int ret; - ret = VG_(ksigprocmask)(VKI_SIG_SETMASK, saved_mask, NULL); - vg_assert(ret == 0); -} - - -/* --------------------------------------------------------------------- - The signal simulation proper. A simplified version of what the - Linux kernel does. - ------------------------------------------------------------------ */ - -/* A structure in which to save the application's registers - during the execution of signal handlers. */ - -typedef - struct { - /* These are parameters to the signal handler. */ - UInt retaddr; /* Sig handler's (bogus) return address */ - Int sigNo; /* The arg to the sig handler. */ - Addr psigInfo; /* ptr to siginfo_t; NULL for now. */ - Addr puContext; /* ptr to ucontext; NULL for now. */ - /* Sanity check word. */ - UInt magicPI; - /* Saved processor state. */ - UInt fpustate[VG_SIZE_OF_FPUSTATE_W]; - UInt eax; - UInt ecx; - UInt edx; - UInt ebx; - UInt ebp; - UInt esp; - UInt esi; - UInt edi; - Addr eip; - UInt eflags; - /* Scheduler-private stuff: what was the thread's status prior to - delivering this signal? */ - ThreadStatus status; - /* Sanity check word. Is the highest-addressed word; do not - move!*/ - UInt magicE; - } - VgSigFrame; - - - -/* Set up a stack frame (VgSigContext) for the client's signal - handler. This includes the signal number and a bogus return - address. */ -static -void vg_push_signal_frame ( ThreadId tid, int sigNo ) -{ - Int i; - Addr esp, esp_top_of_frame; - VgSigFrame* frame; - ThreadState* tst; - - vg_assert(sigNo >= 1 && sigNo <= VKI_KNSIG); - vg_assert(VG_(is_valid_tid)(tid)); - tst = & VG_(threads)[tid]; - - if (/* this signal asked to run on an alt stack */ - (vg_scss.scss_per_sig[sigNo].scss_flags & VKI_SA_ONSTACK) - && /* there is a defined and enabled alt stack, which we're not - already using. Logic from get_sigframe in - arch/i386/kernel/signal.c. */ - sas_ss_flags(tst->m_esp) == 0 - ) { - esp_top_of_frame - = (Addr)(vg_scss.altstack.ss_sp) + vg_scss.altstack.ss_size; - if (VG_(clo_trace_signals)) - VG_(message)(Vg_DebugMsg, - "delivering signal %d to thread %d: on ALT STACK", - sigNo, tid ); - } else { - esp_top_of_frame = tst->m_esp; - } - - esp = esp_top_of_frame; - esp -= sizeof(VgSigFrame); - frame = (VgSigFrame*)esp; - /* Assert that the frame is placed correctly. */ - vg_assert( (sizeof(VgSigFrame) & 0x3) == 0 ); - vg_assert( ((Char*)(&frame->magicE)) + sizeof(UInt) - == ((Char*)(esp_top_of_frame)) ); - - frame->retaddr = (UInt)(&VG_(signalreturn_bogusRA)); - frame->sigNo = sigNo; - frame->psigInfo = (Addr)NULL; - frame->puContext = (Addr)NULL; - frame->magicPI = 0x31415927; - - for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) - frame->fpustate[i] = tst->m_fpu[i]; - - frame->eax = tst->m_eax; - frame->ecx = tst->m_ecx; - frame->edx = tst->m_edx; - frame->ebx = tst->m_ebx; - frame->ebp = tst->m_ebp; - frame->esp = tst->m_esp; - frame->esi = tst->m_esi; - frame->edi = tst->m_edi; - frame->eip = tst->m_eip; - frame->eflags = tst->m_eflags; - - frame->status = tst->status; - - frame->magicE = 0x27182818; - - /* Set the thread so it will next run the handler. */ - tst->m_esp = esp; - tst->m_eip = (Addr)vg_scss.scss_per_sig[sigNo].scss_handler; - /* This thread needs to be marked runnable, but we leave that the - caller to do. */ - - /* Make retaddr, sigNo, psigInfo, puContext fields readable -- at - 0(%ESP) .. 12(%ESP) */ - if (VG_(clo_instrument)) { - VGM_(make_readable) ( ((Addr)esp)+0, 4 ); - VGM_(make_readable) ( ((Addr)esp)+4, 4 ); - VGM_(make_readable) ( ((Addr)esp)+8, 4 ); - VGM_(make_readable) ( ((Addr)esp)+12, 4 ); - } - - /* - VG_(printf)("pushed signal frame; %%ESP now = %p, next %%EBP = %p\n", - esp, tst->m_eip); - */ -} - - -/* Clear the signal frame created by vg_push_signal_frame, restore the - simulated machine state, and return the signal number that the - frame was for. */ -static -Int vg_pop_signal_frame ( ThreadId tid ) -{ - Addr esp; - Int sigNo, i; - VgSigFrame* frame; - ThreadState* tst; - - vg_assert(VG_(is_valid_tid)(tid)); - tst = & VG_(threads)[tid]; - - /* Correctly reestablish the frame base address. */ - esp = tst->m_esp; - frame = (VgSigFrame*) - (esp -4 /* because the handler's RET pops the RA */ - +20 /* because signalreturn_bogusRA pushes 5 words */); - - vg_assert(frame->magicPI == 0x31415927); - vg_assert(frame->magicE == 0x27182818); - if (VG_(clo_trace_signals)) - VG_(message)(Vg_DebugMsg, - "vg_pop_signal_frame (thread %d): valid magic", tid); - - /* restore machine state */ - for (i = 0; i < VG_SIZE_OF_FPUSTATE_W; i++) - tst->m_fpu[i] = frame->fpustate[i]; - - /* Mark the frame structure as nonaccessible. */ - if (VG_(clo_instrument)) - VGM_(make_noaccess)( (Addr)frame, sizeof(VgSigFrame) ); - - /* Restore machine state from the saved context. */ - tst->m_eax = frame->eax; - tst->m_ecx = frame->ecx; - tst->m_edx = frame->edx; - tst->m_ebx = frame->ebx; - tst->m_ebp = frame->ebp; - tst->m_esp = frame->esp; - tst->m_esi = frame->esi; - tst->m_edi = frame->edi; - tst->m_eflags = frame->eflags; - tst->m_eip = frame->eip; - sigNo = frame->sigNo; - - /* And restore the thread's status to what it was before the signal - was delivered. */ - tst->status = frame->status; - - return sigNo; -} - - -/* A handler is returning. Restore the machine state from the stacked - VgSigContext and continue with whatever was going on before the - handler ran. Returns the SA_RESTART syscall-restartability-status - of the delivered signal. */ - -Bool VG_(signal_returns) ( ThreadId tid ) -{ - Int sigNo; - vki_ksigset_t saved_procmask; - - /* Block host signals ... */ - VG_(block_all_host_signals)( &saved_procmask ); - - /* Pop the signal frame and restore tid's status to what it was - before the signal was delivered. */ - sigNo = vg_pop_signal_frame(tid); - - vg_assert(sigNo >= 1 && sigNo <= VKI_KNSIG); - - /* Unlock and return. */ - VG_(restore_all_host_signals)( &saved_procmask ); - - /* Scheduler now can resume this thread, or perhaps some other. - Tell the scheduler whether or not any syscall interrupted by - this signal should be restarted, if possible, or no. */ - return - (vg_scss.scss_per_sig[sigNo].scss_flags & VKI_SA_RESTART) - ? True - : False; -} - - -/* Deliver all pending signals, by building stack frames for their - handlers. Return True if any signals were delivered. */ -Bool VG_(deliver_signals) ( void ) -{ - vki_ksigset_t saved_procmask; - Int sigNo; - Bool found, scss_changed; - ThreadState* tst; - ThreadId tid; - - /* A cheap check. We don't need to have exclusive access to the - pending array, because in the worst case, vg_oursignalhandler - will add signals, causing us to return, thinking there are no - signals to deliver, when in fact there are some. A subsequent - call here will handle the signal(s) we missed. */ - found = False; - for (sigNo = 1; sigNo <= VKI_KNSIG; sigNo++) - if (vg_dcss.dcss_sigpending[sigNo]) - found = True; - - if (!found) return False; - - /* Now we have to do it properly. Get exclusive access by - blocking all the host's signals. That means vg_oursignalhandler - can't run whilst we are messing with stuff. - */ - scss_changed = False; - VG_(block_all_host_signals)( &saved_procmask ); - - /* Look for signals to deliver ... */ - for (sigNo = 1; sigNo <= VKI_KNSIG; sigNo++) { - - if (!vg_dcss.dcss_sigpending[sigNo]) - continue; - - /* sigNo is pending. Try to find a suitable thread to deliver - it to. */ - /* First off, are any threads in sigwait() for the signal? - If so just give to one of them and have done. */ - for (tid = 1; tid < VG_N_THREADS; tid++) { - tst = & VG_(threads)[tid]; - /* Is tid waiting for a signal? If not, ignore. */ - if (tst->status != VgTs_WaitSIG) - continue; - /* Is the signal directed at a specific thread other than - this one? If yes, ignore. */ - if (vg_dcss.dcss_destthread[sigNo] != VG_INVALID_THREADID - && vg_dcss.dcss_destthread[sigNo] != tid) - continue; - /* Is tid waiting for the signal? If not, ignore. */ - if (VG_(ksigismember)(&(tst->sigs_waited_for), sigNo)) - break; - } - if (tid < VG_N_THREADS) { - UInt* sigwait_args; - tst = & VG_(threads)[tid]; - if (VG_(clo_trace_signals) || VG_(clo_trace_sched)) - VG_(message)(Vg_DebugMsg, - "releasing thread %d from sigwait() due to signal %d", - tid, sigNo ); - sigwait_args = (UInt*)(tst->m_eax); - if (NULL != (UInt*)(sigwait_args[2])) { - *(Int*)(sigwait_args[2]) = sigNo; - if (VG_(clo_instrument)) - VGM_(make_readable)( (Addr)(sigwait_args[2]), - sizeof(UInt)); - } - SET_EDX(tid, 0); - tst->status = VgTs_Runnable; - VG_(ksigemptyset)(&tst->sigs_waited_for); - scss_changed = True; - vg_dcss.dcss_sigpending[sigNo] = False; - vg_dcss.dcss_destthread[sigNo] = VG_INVALID_THREADID; - /*paranoia*/ - continue; /* for (sigNo = 1; ...) loop */ - } - - /* Well, nobody appears to be sigwaiting for it. So we really - are delivering the signal in the usual way. And that the - client really has a handler for this thread! */ - vg_assert(vg_dcss.dcss_sigpending[sigNo]); - - /* A recent addition, so as to stop seriously wierd progs dying - at the following assertion (which this renders redundant, - btw). */ - if (vg_scss.scss_per_sig[sigNo].scss_handler == VKI_SIG_IGN - || vg_scss.scss_per_sig[sigNo].scss_handler == VKI_SIG_DFL) { - /* Strange; perhaps the handler disappeared before we could - deliver the signal. */ - VG_(message)(Vg_DebugMsg, - "discarding signal %d for thread %d because handler missing", - sigNo, tid ); - vg_dcss.dcss_sigpending[sigNo] = False; - vg_dcss.dcss_destthread[sigNo] = VG_INVALID_THREADID; - continue; /* for (sigNo = 1; ...) loop */ - } - - vg_assert(vg_scss.scss_per_sig[sigNo].scss_handler != VKI_SIG_IGN - && vg_scss.scss_per_sig[sigNo].scss_handler != VKI_SIG_DFL); - - tid = vg_dcss.dcss_destthread[sigNo]; - vg_assert(tid == VG_INVALID_THREADID - || VG_(is_valid_tid)(tid)); - - if (tid != VG_INVALID_THREADID) { - /* directed to a specific thread; ensure it actually still - exists ... */ - tst = & VG_(threads)[tid]; - if (tst->status == VgTs_Empty) { - /* dead, for whatever reason; ignore this signal */ - if (VG_(clo_trace_signals)) - VG_(message)(Vg_DebugMsg, - "discarding signal %d for nonexistent thread %d", - sigNo, tid ); - vg_dcss.dcss_sigpending[sigNo] = False; - vg_dcss.dcss_destthread[sigNo] = VG_INVALID_THREADID; - continue; /* for (sigNo = 1; ...) loop */ - } - } else { - /* not directed to a specific thread, so search for a - suitable candidate */ - for (tid = 1; tid < VG_N_THREADS; tid++) { - tst = & VG_(threads)[tid]; - if (tst->status != VgTs_Empty - && !VG_(ksigismember)(&(tst->sig_mask), sigNo)) - break; - } - if (tid == VG_N_THREADS) - /* All threads have this signal blocked, so we can't - deliver it just now */ - continue; /* for (sigNo = 1; ...) loop */ - } - - /* Ok, we can deliver signal sigNo to thread tid. */ - - if (VG_(clo_trace_signals)) - VG_(message)(Vg_DebugMsg,"delivering signal %d to thread %d", - sigNo, tid ); - - /* Create a signal delivery frame, and set the client's %ESP and - %EIP so that when execution continues, we will enter the - signal handler with the frame on top of the client's stack, - as it expects. */ - vg_assert(VG_(is_valid_tid)(tid)); - vg_push_signal_frame ( tid, sigNo ); - VG_(threads)[tid].status = VgTs_Runnable; - - /* Signify that the signal has been delivered. */ - vg_dcss.dcss_sigpending[sigNo] = False; - vg_dcss.dcss_destthread[sigNo] = VG_INVALID_THREADID; - - if (vg_scss.scss_per_sig[sigNo].scss_flags & VKI_SA_ONESHOT) { - /* Do the ONESHOT thing. */ - vg_scss.scss_per_sig[sigNo].scss_handler = VKI_SIG_DFL; - scss_changed = True; - } - } - - /* Unlock and return. */ - if (scss_changed) { - /* handle_SCSS_change computes a new kernel blocking mask and - applies that. */ - VG_(handle_SCSS_change)( False /* lazy update */ ); - } else { - /* No SCSS change, so just restore the existing blocking - mask. */ - VG_(restore_all_host_signals)( &saved_procmask ); - } - - return True; -} - - -/* Receive a signal from the host, and either discard it or park it in - the queue of pending signals. All other signals will be blocked - when this handler runs. Runs with all host signals blocked, so as - to have mutual exclusion when adding stuff to the queue. */ - -static -void vg_oursignalhandler ( Int sigNo ) -{ - static UInt segv_warns = 0; - ThreadId tid; - Int dummy_local; - Bool sane; - vki_ksigset_t saved_procmask; - - /* - if (sigNo == VKI_SIGUSR1) { - VG_(printf)("YOWZA! SIGUSR1\n\n"); - VG_(clo_trace_pthread_level) = 2; - VG_(clo_trace_sched) = True; - VG_(clo_trace_syscalls) = True; - VG_(clo_trace_signals) = True; - return; - } - */ - - if (VG_(clo_trace_signals)) { - VG_(start_msg)(Vg_DebugMsg); - VG_(add_to_msg)("signal %d arrived ... ", sigNo ); - } - vg_assert(sigNo >= 1 && sigNo <= VKI_KNSIG); - - /* Sanity check. Ensure we're really running on the signal stack - we asked for. */ - if ( !( - ((Char*)(&(VG_(sigstack)[0])) <= (Char*)(&dummy_local)) - && - ((Char*)(&dummy_local) < (Char*)(&(VG_(sigstack)[10000]))) - ) - ) { - VG_(message)(Vg_DebugMsg, - "FATAL: signal delivered on the wrong stack?!"); - VG_(message)(Vg_DebugMsg, - "A possible workaround follows. Please tell me"); - VG_(message)(Vg_DebugMsg, - "(jseward@acm.org) if the suggested workaround doesn't help."); - VG_(unimplemented) - ("support for progs compiled with -p/-pg; " - "rebuild your prog without -p/-pg"); - } - - vg_assert((Char*)(&(VG_(sigstack)[0])) <= (Char*)(&dummy_local)); - vg_assert((Char*)(&dummy_local) < (Char*)(&(VG_(sigstack)[10000]))); - - VG_(block_all_host_signals)( &saved_procmask ); - - /* This is a sanity check. Either a signal has arrived because the - client set a handler for it, or because some thread sigwaited on - it. Establish that at least one of these is the case. */ - sane = False; - if (vg_scss.scss_per_sig[sigNo].scss_handler != VKI_SIG_DFL - && vg_scss.scss_per_sig[sigNo].scss_handler != VKI_SIG_IGN) { - sane = True; - } else { - for (tid = 1; tid < VG_N_THREADS; tid++) { - if (VG_(threads)[tid].status != VgTs_WaitSIG) - continue; - if (VG_(ksigismember)(&VG_(threads)[tid].sigs_waited_for, sigNo)) - sane = True; - } - } - if (!sane) { - if (VG_(clo_trace_signals)) { - VG_(add_to_msg)("unexpected!"); - VG_(end_msg)(); - } - /* Note: we panic with all signals blocked here. Don't think - that matters. */ - VG_(panic)("vg_oursignalhandler: unexpected signal"); - } - /* End of the sanity check. */ - - /* Decide what to do with it. */ - if (vg_dcss.dcss_sigpending[sigNo]) { - /* pending; ignore it. */ - if (VG_(clo_trace_signals)) { - VG_(add_to_msg)("already pending; discarded" ); - VG_(end_msg)(); - } - } else { - /* Ok, we'd better deliver it to the client. */ - /* Queue it up for delivery at some point in the future. */ - vg_dcss.dcss_sigpending[sigNo] = True; - vg_dcss.dcss_destthread[sigNo] = VG_INVALID_THREADID; - if (VG_(clo_trace_signals)) { - VG_(add_to_msg)("queued" ); - VG_(end_msg)(); - } - } - - /* We've finished messing with the queue, so re-enable host - signals. */ - VG_(restore_all_host_signals)( &saved_procmask ); - - if ( (sigNo == VKI_SIGSEGV || sigNo == VKI_SIGBUS - || sigNo == VKI_SIGFPE || sigNo == VKI_SIGILL) - && - VG_(scheduler_jmpbuf_valid) - ) { - /* Can't continue; must longjmp back to the scheduler and thus - enter the sighandler immediately. */ - VG_(longjmpd_on_signal) = sigNo; - __builtin_longjmp(VG_(scheduler_jmpbuf),1); - } - - if (sigNo == VKI_SIGSEGV && !VG_(scheduler_jmpbuf_valid)) { - if (++segv_warns <= 3) { - VG_(message)(Vg_UserMsg, - "Warning: SIGSEGV not in user code; either from syscall kill()" ); - VG_(message)(Vg_UserMsg, - " or possible Valgrind bug. " - "This message is only shown 3 times." ); - } - } -} - - -/* The outer insn loop calls here to reenable a host signal if - vg_oursighandler longjmp'd. -*/ -void VG_(unblock_host_signal) ( Int sigNo ) -{ - Int ret; - vki_ksigset_t set; - VG_(ksigemptyset)(&set); - ret = VG_(ksigaddset)(&set,sigNo); - vg_assert(ret == 0); - ret = VG_(ksigprocmask)(VKI_SIG_UNBLOCK,&set,NULL); - vg_assert(ret == 0); -} - - -static __attribute((unused)) -void pp_vg_ksigaction ( vki_ksigaction* sa ) -{ - Int i; - VG_(printf)("vg_ksigaction: handler %p, flags 0x%x, restorer %p\n", - sa->ksa_handler, (UInt)sa->ksa_flags, sa->ksa_restorer); - VG_(printf)("vg_ksigaction: { "); - for (i = 1; i <= VKI_KNSIG; i++) - if (VG_(ksigismember(&(sa->ksa_mask),i))) - VG_(printf)("%d ", i); - VG_(printf)("}\n"); -} - - -/* At startup, copy the process' real signal state to the SCSS. - Whilst doing this, block all real signals. Then calculate SKSS and - set the kernel to that. Also initialise DCSS. -*/ -void VG_(sigstartup_actions) ( void ) -{ - Int i, ret; - - vki_ksigset_t saved_procmask; - vki_kstack_t altstack_info; - vki_ksigaction sa; - - /* VG_(printf)("SIGSTARTUP\n"); */ - /* Block all signals. - saved_procmask remembers the previous mask. */ - VG_(block_all_host_signals)( &saved_procmask ); - - /* Copy per-signal settings to SCSS. */ - for (i = 1; i <= VKI_KNSIG; i++) { - - /* Get the old host action */ - ret = VG_(ksigaction)(i, NULL, &sa); - vg_assert(ret == 0); - - if (VG_(clo_trace_signals)) - VG_(printf)("snaffling handler 0x%x for signal %d\n", - (Addr)(sa.ksa_handler), i ); - - vg_scss.scss_per_sig[i].scss_handler = sa.ksa_handler; - vg_scss.scss_per_sig[i].scss_flags = sa.ksa_flags; - vg_scss.scss_per_sig[i].scss_mask = sa.ksa_mask; - vg_scss.scss_per_sig[i].scss_restorer = sa.ksa_restorer; - } - - /* Copy the alt stack, if any. */ - ret = VG_(ksigaltstack)(NULL, &vg_scss.altstack); - vg_assert(ret == 0); - - /* Copy the process' signal mask into the root thread. */ - vg_assert(VG_(threads)[1].status == VgTs_Runnable); - VG_(threads)[1].sig_mask = saved_procmask; - - /* Initialise DCSS. */ - for (i = 1; i <= VKI_KNSIG; i++) { - vg_dcss.dcss_sigpending[i] = False; - vg_dcss.dcss_destthread[i] = VG_INVALID_THREADID; - } - - /* Register an alternative stack for our own signal handler to run - on. */ - altstack_info.ss_sp = &(VG_(sigstack)[0]); - altstack_info.ss_size = 10000 * sizeof(UInt); - altstack_info.ss_flags = 0; - ret = VG_(ksigaltstack)(&altstack_info, NULL); - if (ret != 0) { - VG_(panic)( - "vg_sigstartup_actions: couldn't install alternative sigstack"); - } - if (VG_(clo_trace_signals)) { - VG_(message)(Vg_DebugExtraMsg, - "vg_sigstartup_actions: sigstack installed ok"); - } - - /* DEBUGGING HACK */ - /* VG_(ksignal)(VKI_SIGUSR1, &VG_(oursignalhandler)); */ - - /* Calculate SKSS and apply it. This also sets the initial kernel - mask we need to run with. */ - VG_(handle_SCSS_change)( True /* forced update */ ); -} - - -/* Copy the process' sim signal state to the real state, - for when we transfer from the simulated to real CPU. - PROBLEM: what if we're running a signal handler when we - get here? Hmm. - I guess we wind up in vg_signalreturn_bogusRA, *or* the - handler has done/will do a longjmp, in which case we're ok. - - It is important (see vg_startup.S) that this proc does not - change the state of the real FPU, since it is called when - running the program on the real CPU. -*/ -void VG_(sigshutdown_actions) ( void ) -{ - Int i, ret; - - vki_ksigset_t saved_procmask; - vki_ksigaction sa; - - VG_(block_all_host_signals)( &saved_procmask ); - - /* Copy per-signal settings from SCSS. */ - for (i = 1; i <= VKI_KNSIG; i++) { - - sa.ksa_handler = vg_scss.scss_per_sig[i].scss_handler; - sa.ksa_flags = vg_scss.scss_per_sig[i].scss_flags; - sa.ksa_mask = vg_scss.scss_per_sig[i].scss_mask; - sa.ksa_restorer = vg_scss.scss_per_sig[i].scss_restorer; - - if (VG_(clo_trace_signals)) - VG_(printf)("restoring handler 0x%x for signal %d\n", - (Addr)(sa.ksa_handler), i ); - - /* Set the old host action */ - ret = VG_(ksigaction)(i, &sa, NULL); - if (i != VKI_SIGKILL && i != VKI_SIGSTOP) - vg_assert(ret == 0); - } - - /* Restore the sig alt stack. */ - ret = VG_(ksigaltstack)(&vg_scss.altstack, NULL); - vg_assert(ret == 0); - - /* A bit of a kludge -- set the sigmask to that of the root - thread. */ - vg_assert(VG_(threads)[1].status != VgTs_Empty); - VG_(restore_all_host_signals)( &VG_(threads)[1].sig_mask ); -} - - -/*--------------------------------------------------------------------*/ -/*--- end vg_signals.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_startup.S b/coregrind/vg_startup.S deleted file mode 100644 index 63ee590153..0000000000 --- a/coregrind/vg_startup.S +++ /dev/null @@ -1,233 +0,0 @@ - -##--------------------------------------------------------------------## -##--- Startup and shutdown code for Valgrind. ---## -##--- vg_startup.S ---## -##--------------------------------------------------------------------## - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_constants.h" - - -#--------------------------------------------------------------------- -# -# Startup and shutdown code for Valgrind. Particularly hairy. -# -# The dynamic linker, ld.so, will run the contents of the .init -# section, once it has located, mmap-d and and linked the shared -# libraries needed by the program. Valgrind is itself a shared -# library. ld.so then runs code in the .init sections of each -# library in turn, in order to give them a chance to initialise -# themselves. We hijack this mechanism. Our startup routine -# does return -- and execution continues -- except on the -# synthetic CPU, not the real one. But ld.so, and the program -# it is starting, cant tell the difference. -# -# The management apologise for the lack of apostrophes in these -# comments. GNU as seems to object to them, for some reason. - - -.section .init - call VG_(startup) -.section .fini - call VG_(shutdown) - -.section .data -valgrind_already_initted: - .word 0 - -.section .text - - -.global VG_(startup) -VG_(startup): - cmpl $0, valgrind_already_initted - je really_start_up - ret - -really_start_up: - movl $1, valgrind_already_initted - - # Record %esp as it was when we got here. This is because argv/c - # and envp[] are passed as args to this function, and we need to see - # envp so we can get at the env var VG_ARGS without help from libc. - # The stack layout at this point depends on the version of glibc in - # use. See process_cmd_line_options() in vg_main.c for details. - movl %esp, VG_(esp_at_startup) - - # We have control! Save the state of the machine in - # the simulators state, and switch stacks. - # Except ... we cant copy the machines registers into their - # final places in vg_baseBlock, because the offsets to them - # have not yet been set up. Instead, they are copied to a - # temporary place (m_state_static). In vg_main.c, once the - # baseBlock offsets are set up, values are copied into baseBlock. - movl %eax, VG_(m_state_static)+0 - movl %ecx, VG_(m_state_static)+4 - movl %edx, VG_(m_state_static)+8 - movl %ebx, VG_(m_state_static)+12 - movl %esp, VG_(m_state_static)+16 - movl %ebp, VG_(m_state_static)+20 - movl %esi, VG_(m_state_static)+24 - movl %edi, VG_(m_state_static)+28 - pushfl - popl %eax - movl %eax, VG_(m_state_static)+32 - fwait - fnsave VG_(m_state_static)+40 - frstor VG_(m_state_static)+40 - - # keep the first and last 10 words free to check for overruns - movl $VG_(stack)+39996 -40, %esp - - # Now some real magic. We need this procedure to return, - # since thats what ld.so expects, but running on the - # simulator. So vg_main starts the simulator running at - # the insn labelled first_insn_to_simulate. - - movl $first_insn_to_simulate, VG_(m_state_static)+36 - jmp VG_(main) -first_insn_to_simulate: - # Nothing else to do -- just return in the "normal" way. - ret - - - -VG_(shutdown): - # Just return, and ignore any attempt by ld.so to call - # valgrind.sos exit function. We just run the client all - # the way to the final exit() syscall. This sidesteps - # problems caused by ld.so calling the finalisation code - # of other .sos *after* it shuts down valgrind, which - # was causing big problems with threads. - ret - - - -.global VG_(switch_to_real_CPU) -VG_(switch_to_real_CPU): - # Once Valgrind has decided it needs to exit, - # because the specified number of insns have been completed - # during a debugging run, it jumps here, which copies the - # simulators state into the real machine state. Execution - # of the rest of the program continues on the real CPU, - # and there is no way for the simulator to regain control - # after this point. - frstor VG_(m_state_static)+40 - movl VG_(m_state_static)+32, %eax - pushl %eax - popfl - movl VG_(m_state_static)+0, %eax - movl VG_(m_state_static)+4, %ecx - movl VG_(m_state_static)+8, %edx - movl VG_(m_state_static)+12, %ebx - movl VG_(m_state_static)+16, %esp - movl VG_(m_state_static)+20, %ebp - movl VG_(m_state_static)+24, %esi - movl VG_(m_state_static)+28, %edi - - pushal - pushfl - # We hope that vg_sigshutdown_actions does not alter - # the FPU state. - call VG_(sigshutdown_actions) - popfl - popal - # re-restore the FPU state anyway ... - frstor VG_(m_state_static)+40 - jmp *VG_(m_state_static)+36 - - - -/*------------------------------------------------------------*/ -/*--- A function to temporarily copy %ESP/%EBP into ---*/ -/*--- %esp/%ebp and then start up GDB. ---*/ -/*------------------------------------------------------------*/ - -/* -extern void VG_(swizzle_esp_then_start_GDB) ( Addr m_eip_at_error, - Addr m_esp_at_error, - Addr m_ebp_at_error ); -*/ - -/*--- This is clearly not re-entrant! ---*/ -.data -vg_ebp_saved_over_GDB_start: - .long 0 -vg_esp_saved_over_GDB_start: - .long 0 -.text - -.global VG_(swizzle_esp_then_start_GDB) -VG_(swizzle_esp_then_start_GDB): - pushal - - # remember the simulators current stack/frame pointers - movl %ebp, vg_ebp_saved_over_GDB_start - movl %esp, vg_esp_saved_over_GDB_start - - # get args into regs - movl 44(%esp), %eax # client %EBP - movl 40(%esp), %ebx # client %ESP - movl 36(%esp), %ecx # client %EIP - - # Now that we dont need to refer to simulators stack any more, - # put %ESP into %esp - movl %ebx, %esp - - ### %esp now refers to clients stack - ### mess with the clients stack to make it look as if it - ### called this procedure, since otherwise it will look to gdb - ### as if the top (currently executing) stack frame of the - ### client is missing. - - # push %EIP. This is a faked-up return address. - pushl %ecx - - # push %EBP. This is a faked %ebp-chain pointer. - pushl %eax - - movl %esp, %ebp - - call VG_(start_GDB_whilst_on_client_stack) - - # restore the simulators stack/frame pointer - movl vg_ebp_saved_over_GDB_start, %ebp - movl vg_esp_saved_over_GDB_start, %esp - - popal - ret - -# gcc puts this construction at the end of every function. I think it -# allows the linker to figure out the size of the function. So we do -# the same, in the vague hope that it might help GDBs navigation. -.Lend_of_swizzle: - .size VG_(swizzle_esp_then_start_GDB), .Lend_of_swizzle-VG_(swizzle_esp_then_start_GDB) - - -##--------------------------------------------------------------------## -##--- end vg_startup.S ---## -##--------------------------------------------------------------------## diff --git a/coregrind/vg_symtab2.c b/coregrind/vg_symtab2.c deleted file mode 100644 index 8330794ee3..0000000000 --- a/coregrind/vg_symtab2.c +++ /dev/null @@ -1,2079 +0,0 @@ -/*--------------------------------------------------------------------*/ -/*--- Management of symbols and debugging information. ---*/ -/*--- vg_symtab2.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_include.h" - -#include /* ELF defns */ -#include /* stabs defns */ - - -/* Majorly rewritten Sun 3 Feb 02 to enable loading symbols from - dlopen()ed libraries, which is something that KDE3 does a lot. - - Stabs reader greatly improved by Nick Nethercode, Apr 02. - - 16 May 02: when notified about munmap, return a Bool indicating - whether or not the area being munmapped had executable permissions. - This is then used to determine whether or not - VG_(invalid_translations) should be called for that area. In order - that this work even if --instrument=no, in this case we still keep - track of the mapped executable segments, but do not load any debug - info or symbols. -*/ - -/*------------------------------------------------------------*/ -/*--- Structs n stuff ---*/ -/*------------------------------------------------------------*/ - -/* A structure to hold an ELF symbol (very crudely). */ -typedef - struct { - Addr addr; /* lowest address of entity */ - UInt size; /* size in bytes */ - Int nmoff; /* offset of name in this SegInfo's str tab */ - } - RiSym; - -/* Line count at which overflow happens, due to line numbers being stored as - * shorts in `struct nlist' in a.out.h. */ -#define LINENO_OVERFLOW (1 << (sizeof(short) * 8)) - -#define LINENO_BITS 20 -#define LOC_SIZE_BITS (32 - LINENO_BITS) -#define MAX_LINENO ((1 << LINENO_BITS) - 1) - -/* Unlikely to have any lines with instruction ranges > 4096 bytes */ -#define MAX_LOC_SIZE ((1 << LOC_SIZE_BITS) - 1) - -/* Number used to detect line number overflows; if one line is 60000-odd - * smaller than the previous, is was probably an overflow. - */ -#define OVERFLOW_DIFFERENCE (LINENO_OVERFLOW - 5000) - -/* A structure to hold addr-to-source info for a single line. There can be a - * lot of these, hence the dense packing. */ -typedef - struct { - /* Word 1 */ - Addr addr; /* lowest address for this line */ - /* Word 2 */ - UShort size:LOC_SIZE_BITS; /* byte size; we catch overflows of this */ - UInt lineno:LINENO_BITS; /* source line number, or zero */ - /* Word 3 */ - UInt fnmoff; /* source filename; offset in this - SegInfo's str tab */ - } - RiLoc; - - -/* A structure which contains information pertaining to one mapped - text segment. */ -typedef - struct _SegInfo { - struct _SegInfo* next; - /* Description of the mapped segment. */ - Addr start; - UInt size; - UChar* filename; /* in mallocville */ - UInt foffset; - /* An expandable array of symbols. */ - RiSym* symtab; - UInt symtab_used; - UInt symtab_size; - /* An expandable array of locations. */ - RiLoc* loctab; - UInt loctab_used; - UInt loctab_size; - /* An expandable array of characters -- the string table. */ - Char* strtab; - UInt strtab_used; - UInt strtab_size; - /* offset is what we need to add to symbol table entries - to get the real location of that symbol in memory. - For executables, offset is zero. - For .so's, offset == base_addr. - This seems like a giant kludge to me. - */ - UInt offset; - } - SegInfo; - - -/* -- debug helper -- */ -static void ppSegInfo ( SegInfo* si ) -{ - VG_(printf)("name: %s\n" - "start %p, size %d, foffset %d\n", - si->filename?si->filename : (UChar*)"NULL", - si->start, si->size, si->foffset ); -} - -static void freeSegInfo ( SegInfo* si ) -{ - vg_assert(si != NULL); - if (si->filename) VG_(free)(VG_AR_SYMTAB, si->filename); - if (si->symtab) VG_(free)(VG_AR_SYMTAB, si->symtab); - if (si->loctab) VG_(free)(VG_AR_SYMTAB, si->loctab); - if (si->strtab) VG_(free)(VG_AR_SYMTAB, si->strtab); - VG_(free)(VG_AR_SYMTAB, si); -} - - -/*------------------------------------------------------------*/ -/*--- Adding stuff ---*/ -/*------------------------------------------------------------*/ - -/* Add a str to the string table, including terminating zero, and - return offset of the string in vg_strtab. */ - -static __inline__ -Int addStr ( SegInfo* si, Char* str ) -{ - Char* new_tab; - Int new_sz, i, space_needed; - - space_needed = 1 + VG_(strlen)(str); - if (si->strtab_used + space_needed > si->strtab_size) { - new_sz = 2 * si->strtab_size; - if (new_sz == 0) new_sz = 5000; - new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz); - if (si->strtab != NULL) { - for (i = 0; i < si->strtab_used; i++) - new_tab[i] = si->strtab[i]; - VG_(free)(VG_AR_SYMTAB, si->strtab); - } - si->strtab = new_tab; - si->strtab_size = new_sz; - } - - for (i = 0; i < space_needed; i++) - si->strtab[si->strtab_used+i] = str[i]; - - si->strtab_used += space_needed; - vg_assert(si->strtab_used <= si->strtab_size); - return si->strtab_used - space_needed; -} - -/* Add a symbol to the symbol table. */ - -static __inline__ -void addSym ( SegInfo* si, RiSym* sym ) -{ - Int new_sz, i; - RiSym* new_tab; - - /* Ignore zero-sized syms. */ - if (sym->size == 0) return; - - if (si->symtab_used == si->symtab_size) { - new_sz = 2 * si->symtab_size; - if (new_sz == 0) new_sz = 500; - new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiSym) ); - if (si->symtab != NULL) { - for (i = 0; i < si->symtab_used; i++) - new_tab[i] = si->symtab[i]; - VG_(free)(VG_AR_SYMTAB, si->symtab); - } - si->symtab = new_tab; - si->symtab_size = new_sz; - } - - si->symtab[si->symtab_used] = *sym; - si->symtab_used++; - vg_assert(si->symtab_used <= si->symtab_size); -} - -/* Add a location to the location table. */ - -static __inline__ -void addLoc ( SegInfo* si, RiLoc* loc ) -{ - Int new_sz, i; - RiLoc* new_tab; - - /* Zero-sized locs should have been ignored earlier */ - vg_assert(loc->size > 0); - - if (si->loctab_used == si->loctab_size) { - new_sz = 2 * si->loctab_size; - if (new_sz == 0) new_sz = 500; - new_tab = VG_(malloc)(VG_AR_SYMTAB, new_sz * sizeof(RiLoc) ); - if (si->loctab != NULL) { - for (i = 0; i < si->loctab_used; i++) - new_tab[i] = si->loctab[i]; - VG_(free)(VG_AR_SYMTAB, si->loctab); - } - si->loctab = new_tab; - si->loctab_size = new_sz; - } - - si->loctab[si->loctab_used] = *loc; - si->loctab_used++; - vg_assert(si->loctab_used <= si->loctab_size); -} - - -/* Top-level place to call to add a source-location mapping entry. */ - -static __inline__ -void addLineInfo ( SegInfo* si, - Int fnmoff, - Addr this, - Addr next, - Int lineno, - Int entry /* only needed for debug printing */ - ) -{ - RiLoc loc; - Int size = next - this; - - /* Ignore zero-sized locs */ - if (this == next) return; - - /* Maximum sanity checking. Some versions of GNU as do a shabby - * job with stabs entries; if anything looks suspicious, revert to - * a size of 1. This should catch the instruction of interest - * (since if using asm-level debug info, one instruction will - * correspond to one line, unlike with C-level debug info where - * multiple instructions can map to the one line), but avoid - * catching any other instructions bogusly. */ - if (this > next) { - VG_(message)(Vg_DebugMsg, - "warning: line info addresses out of order " - "at entry %d: 0x%x 0x%x", entry, this, next); - size = 1; - } - - if (size > MAX_LOC_SIZE) { - if (0) - VG_(message)(Vg_DebugMsg, - "warning: line info address range too large " - "at entry %d: %d", entry, size); - size = 1; - } - - /* vg_assert(this < si->start + si->size && next-1 >= si->start); */ - if (this >= si->start + si->size || next-1 < si->start) { - if (0) - VG_(message)(Vg_DebugMsg, - "warning: ignoring line info entry falling " - "outside current SegInfo: %p %p %p %p", - si->start, si->start + si->size, - this, next-1); - return; - } - - vg_assert(lineno >= 0); - if (lineno > MAX_LINENO) { - VG_(message)(Vg_UserMsg, - "warning: ignoring line info entry with " - "huge line number (%d)", lineno); - VG_(message)(Vg_UserMsg, - " Can't handle line numbers " - "greater than %d, sorry", MAX_LINENO); - return; - } - - loc.addr = this; - loc.size = (UShort)size; - loc.lineno = lineno; - loc.fnmoff = fnmoff; - addLoc ( si, &loc ); -} - - -/*------------------------------------------------------------*/ -/*--- Helpers ---*/ -/*------------------------------------------------------------*/ - -/* Non-fatal -- use vg_panic if terminal. */ -static -void vg_symerr ( Char* msg ) -{ - if (VG_(clo_verbosity) > 1) - VG_(message)(Vg_UserMsg,"%s", msg ); -} - - -/* Print a symbol. */ -static -void printSym ( SegInfo* si, Int i ) -{ - VG_(printf)( "%5d: %8p .. %8p (%d) %s\n", - i, - si->symtab[i].addr, - si->symtab[i].addr + si->symtab[i].size - 1, si->symtab[i].size, - &si->strtab[si->symtab[i].nmoff] ); -} - - -#if 0 -/* Print the entire sym tab. */ -static __attribute__ ((unused)) -void printSymtab ( void ) -{ - Int i; - VG_(printf)("\n------ BEGIN vg_symtab ------\n"); - for (i = 0; i < vg_symtab_used; i++) - printSym(i); - VG_(printf)("------ BEGIN vg_symtab ------\n"); -} -#endif - -#if 0 -/* Paranoid strcat. */ -static -void safeCopy ( UChar* dst, UInt maxlen, UChar* src ) -{ - UInt i = 0, j = 0; - while (True) { - if (i >= maxlen) return; - if (dst[i] == 0) break; - i++; - } - while (True) { - if (i >= maxlen) return; - dst[i] = src[j]; - if (src[j] == 0) return; - i++; j++; - } -} -#endif - - -/*------------------------------------------------------------*/ -/*--- Canonicalisers ---*/ -/*------------------------------------------------------------*/ - -/* Sort the symtab by starting address, and emit warnings if any - symbols have overlapping address ranges. We use that old chestnut, - shellsort. Mash the table around so as to establish the property - that addresses are in order and the ranges to not overlap. This - facilitates using binary search to map addresses to symbols when we - come to query the table. -*/ -static -void canonicaliseSymtab ( SegInfo* si ) -{ - /* Magic numbers due to Janet Incerpi and Robert Sedgewick. */ - Int incs[16] = { 1, 3, 7, 21, 48, 112, 336, 861, 1968, - 4592, 13776, 33936, 86961, 198768, - 463792, 1391376 }; - Int lo = 0; - Int hi = si->symtab_used-1; - Int i, j, h, bigN, hp, n_merged, n_truncated; - RiSym v; - Addr s1, s2, e1, e2; - -# define SWAP(ty,aa,bb) \ - do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0) - - bigN = hi - lo + 1; if (bigN < 2) return; - hp = 0; while (hp < 16 && incs[hp] < bigN) hp++; hp--; - vg_assert(0 <= hp && hp < 16); - - for (; hp >= 0; hp--) { - h = incs[hp]; - i = lo + h; - while (1) { - if (i > hi) break; - v = si->symtab[i]; - j = i; - while (si->symtab[j-h].addr > v.addr) { - si->symtab[j] = si->symtab[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - si->symtab[j] = v; - i++; - } - } - - cleanup_more: - - /* If two symbols have identical address ranges, favour the - one with the longer name. - */ - do { - n_merged = 0; - j = si->symtab_used; - si->symtab_used = 0; - for (i = 0; i < j; i++) { - if (i < j-1 - && si->symtab[i].addr == si->symtab[i+1].addr - && si->symtab[i].size == si->symtab[i+1].size) { - n_merged++; - /* merge the two into one */ - if (VG_(strlen)(&si->strtab[si->symtab[i].nmoff]) - > VG_(strlen)(&si->strtab[si->symtab[i+1].nmoff])) { - si->symtab[si->symtab_used++] = si->symtab[i]; - } else { - si->symtab[si->symtab_used++] = si->symtab[i+1]; - } - i++; - } else { - si->symtab[si->symtab_used++] = si->symtab[i]; - } - } - if (VG_(clo_trace_symtab)) - VG_(printf)( "%d merged\n", n_merged); - } - while (n_merged > 0); - - /* Detect and "fix" overlapping address ranges. */ - n_truncated = 0; - - for (i = 0; i < si->symtab_used-1; i++) { - - vg_assert(si->symtab[i].addr <= si->symtab[i+1].addr); - - /* Check for common (no overlap) case. */ - if (si->symtab[i].addr + si->symtab[i].size - <= si->symtab[i+1].addr) - continue; - - /* There's an overlap. Truncate one or the other. */ - if (VG_(clo_trace_symtab)) { - VG_(printf)("overlapping address ranges in symbol table\n\t"); - printSym(si,i); - VG_(printf)("\t"); - printSym(si,i+1); - VG_(printf)("\n"); - } - - /* Truncate one or the other. */ - s1 = si->symtab[i].addr; - s2 = si->symtab[i+1].addr; - e1 = s1 + si->symtab[i].size - 1; - e2 = s2 + si->symtab[i+1].size - 1; - if (s1 < s2) { - e1 = s2-1; - } else { - vg_assert(s1 == s2); - if (e1 > e2) { - s1 = e2+1; SWAP(Addr,s1,s2); SWAP(Addr,e1,e2); - } else - if (e1 < e2) { - s2 = e1+1; - } else { - /* e1 == e2. Identical addr ranges. We'll eventually wind - up back at cleanup_more, which will take care of it. */ - } - } - si->symtab[i].addr = s1; - si->symtab[i+1].addr = s2; - si->symtab[i].size = e1 - s1 + 1; - si->symtab[i+1].size = e2 - s2 + 1; - vg_assert(s1 <= s2); - vg_assert(si->symtab[i].size > 0); - vg_assert(si->symtab[i+1].size > 0); - /* It may be that the i+1 entry now needs to be moved further - along to maintain the address order requirement. */ - j = i+1; - while (j < si->symtab_used-1 - && si->symtab[j].addr > si->symtab[j+1].addr) { - SWAP(RiSym,si->symtab[j],si->symtab[j+1]); - j++; - } - n_truncated++; - } - - if (n_truncated > 0) goto cleanup_more; - - /* Ensure relevant postconditions hold. */ - for (i = 0; i < si->symtab_used-1; i++) { - /* No zero-sized symbols. */ - vg_assert(si->symtab[i].size > 0); - /* In order. */ - vg_assert(si->symtab[i].addr < si->symtab[i+1].addr); - /* No overlaps. */ - vg_assert(si->symtab[i].addr + si->symtab[i].size - 1 - < si->symtab[i+1].addr); - } -# undef SWAP -} - - - -/* Sort the location table by starting address. Mash the table around - so as to establish the property that addresses are in order and the - ranges do not overlap. This facilitates using binary search to map - addresses to locations when we come to query the table. -*/ -static -void canonicaliseLoctab ( SegInfo* si ) -{ - /* Magic numbers due to Janet Incerpi and Robert Sedgewick. */ - Int incs[16] = { 1, 3, 7, 21, 48, 112, 336, 861, 1968, - 4592, 13776, 33936, 86961, 198768, - 463792, 1391376 }; - Int lo = 0; - Int hi = si->loctab_used-1; - Int i, j, h, bigN, hp; - RiLoc v; - -# define SWAP(ty,aa,bb) \ - do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0); - - /* Sort by start address. */ - - bigN = hi - lo + 1; if (bigN < 2) return; - hp = 0; while (hp < 16 && incs[hp] < bigN) hp++; hp--; - vg_assert(0 <= hp && hp < 16); - - for (; hp >= 0; hp--) { - h = incs[hp]; - i = lo + h; - while (1) { - if (i > hi) break; - v = si->loctab[i]; - j = i; - while (si->loctab[j-h].addr > v.addr) { - si->loctab[j] = si->loctab[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - si->loctab[j] = v; - i++; - } - } - - /* If two adjacent entries overlap, truncate the first. */ - for (i = 0; i < si->loctab_used-1; i++) { - vg_assert(si->loctab[i].size < 10000); - if (si->loctab[i].addr + si->loctab[i].size > si->loctab[i+1].addr) { - /* Do this in signed int32 because the actual .size fields - are unsigned 16s. */ - Int new_size = si->loctab[i+1].addr - si->loctab[i].addr; - if (new_size < 0) { - si->loctab[i].size = 0; - } else - if (new_size >= 65536) { - si->loctab[i].size = 65535; - } else { - si->loctab[i].size = (UShort)new_size; - } - } - } - - /* Zap any zero-sized entries resulting from the truncation - process. */ - j = 0; - for (i = 0; i < si->loctab_used; i++) { - if (si->loctab[i].size > 0) { - si->loctab[j] = si->loctab[i]; - j++; - } - } - si->loctab_used = j; - - /* Ensure relevant postconditions hold. */ - for (i = 0; i < si->loctab_used-1; i++) { - /* - VG_(printf)("%d (%d) %d 0x%x\n", - i, si->loctab[i+1].confident, - si->loctab[i+1].size, si->loctab[i+1].addr ); - */ - /* No zero-sized symbols. */ - vg_assert(si->loctab[i].size > 0); - /* In order. */ - vg_assert(si->loctab[i].addr < si->loctab[i+1].addr); - /* No overlaps. */ - vg_assert(si->loctab[i].addr + si->loctab[i].size - 1 - < si->loctab[i+1].addr); - } -# undef SWAP -} - - -/*------------------------------------------------------------*/ -/*--- Read STABS format debug info. ---*/ -/*------------------------------------------------------------*/ - -/* Stabs entry types, from: - * The "stabs" debug format - * Menapace, Kingdon and MacKenzie - * Cygnus Support - */ -typedef enum { N_GSYM = 32, /* Global symbol */ - N_FUN = 36, /* Function start or end */ - N_STSYM = 38, /* Data segment file-scope variable */ - N_LCSYM = 40, /* BSS segment file-scope variable */ - N_RSYM = 64, /* Register variable */ - N_SLINE = 68, /* Source line number */ - N_SO = 100, /* Source file path and name */ - N_LSYM = 128, /* Stack variable or type */ - N_SOL = 132, /* Include file name */ - N_LBRAC = 192, /* Start of lexical block */ - N_RBRAC = 224 /* End of lexical block */ - } stab_types; - - -/* Read stabs-format debug info. This is all rather horrible because - stabs is a underspecified, kludgy hack. -*/ -static -void read_debuginfo_stabs ( SegInfo* si, - UChar* stabC, Int stab_sz, - UChar* stabstr, Int stabstr_sz ) -{ - Int i; - Int curr_filenmoff; - Addr curr_fn_stabs_addr = (Addr)NULL; - Addr curr_fnbaseaddr = (Addr)NULL; - Char *curr_file_name, *curr_fn_name; - Int n_stab_entries; - Int prev_lineno = 0, lineno = 0; - Int lineno_overflows = 0; - Bool same_file = True; - struct nlist* stab = (struct nlist*)stabC; - - /* Ok. It all looks plausible. Go on and read debug data. - stab kinds: 100 N_SO a source file name - 68 N_SLINE a source line number - 36 N_FUN start of a function - - In this loop, we maintain a current file name, updated as - N_SO/N_SOLs appear, and a current function base address, - updated as N_FUNs appear. Based on that, address ranges for - N_SLINEs are calculated, and stuffed into the line info table. - - Finding the instruction address range covered by an N_SLINE is - complicated; see the N_SLINE case below. - */ - curr_filenmoff = addStr(si,"???"); - curr_file_name = curr_fn_name = (Char*)NULL; - - n_stab_entries = stab_sz/(int)sizeof(struct nlist); - - for (i = 0; i < n_stab_entries; i++) { -# if 0 - VG_(printf) ( " %2d ", i ); - VG_(printf) ( "type=0x%x othr=%d desc=%d value=0x%x strx=%d %s", - stab[i].n_type, stab[i].n_other, stab[i].n_desc, - (int)stab[i].n_value, - (int)stab[i].n_un.n_strx, - stabstr + stab[i].n_un.n_strx ); - VG_(printf)("\n"); -# endif - - Char *no_fn_name = "???"; - - switch (stab[i].n_type) { - UInt next_addr; - - /* Two complicated things here: - * - * 1. the n_desc field in 'struct n_list' in a.out.h is only - * 16-bits, which gives a maximum of 65535 lines. We handle - * files bigger than this by detecting heuristically - * overflows -- if the line count goes from 65000-odd to - * 0-odd within the same file, we assume it's an overflow. - * Once we switch files, we zero the overflow count. - * - * 2. To compute the instr address range covered by a single - * line, find the address of the next thing and compute the - * difference. The approach used depends on what kind of - * entry/entries follow... - */ - case N_SLINE: { - Int this_addr = (UInt)stab[i].n_value; - - /* Although stored as a short, neg values really are > - * 32768, hence the UShort cast. Then we use an Int to - * handle overflows. */ - prev_lineno = lineno; - lineno = (Int)((UShort)stab[i].n_desc); - - if (prev_lineno > lineno + OVERFLOW_DIFFERENCE && same_file) { - VG_(message)(Vg_DebugMsg, - "Line number overflow detected (%d --> %d) in %s", - prev_lineno, lineno, curr_file_name); - lineno_overflows++; - } - same_file = True; - - LOOP: - if (i+1 >= n_stab_entries) { - /* If it's the last entry, just guess the range is - * four; can't do any better */ - next_addr = this_addr + 4; - } else { - switch (stab[i+1].n_type) { - /* Easy, common case: use address of next entry */ - case N_SLINE: case N_SO: - next_addr = (UInt)stab[i+1].n_value; - break; - - /* Boring one: skip, look for something more - useful. */ - case N_RSYM: case N_LSYM: case N_LBRAC: case N_RBRAC: - case N_STSYM: case N_LCSYM: case N_GSYM: - i++; - goto LOOP; - - /* If end-of-this-fun entry, use its address. - * If start-of-next-fun entry, find difference between start - * of current function and start of next function to work - * it out. - */ - case N_FUN: - if ('\0' == * (stabstr + stab[i+1].n_un.n_strx) ) { - next_addr = (UInt)stab[i+1].n_value; - } else { - next_addr = - (UInt)stab[i+1].n_value - curr_fn_stabs_addr; - } - break; - - /* N_SOL should be followed by an N_SLINE which can - be used */ - case N_SOL: - if (i+2 < n_stab_entries && N_SLINE == stab[i+2].n_type) { - next_addr = (UInt)stab[i+2].n_value; - break; - } else { - VG_(printf)("unhandled N_SOL stabs case: %d %d %d", - stab[i+1].n_type, i, n_stab_entries); - VG_(panic)("unhandled N_SOL stabs case"); - } - - default: - VG_(printf)("unhandled (other) stabs case: %d %d", - stab[i+1].n_type,i); - /* VG_(panic)("unhandled (other) stabs case"); */ - next_addr = this_addr + 4; - break; - } - } - - addLineInfo ( si, curr_filenmoff, curr_fnbaseaddr + this_addr, - curr_fnbaseaddr + next_addr, - lineno + lineno_overflows * LINENO_OVERFLOW, i); - break; - } - - case N_FUN: { - if ('\0' != (stabstr + stab[i].n_un.n_strx)[0] ) { - /* N_FUN with a name -- indicates the start of a fn. */ - curr_fn_stabs_addr = (Addr)stab[i].n_value; - curr_fnbaseaddr = si->offset + curr_fn_stabs_addr; - curr_fn_name = stabstr + stab[i].n_un.n_strx; - } else { - curr_fn_name = no_fn_name; - } - break; - } - - case N_SOL: - if (lineno_overflows != 0) { - VG_(message)(Vg_UserMsg, - "Warning: file %s is very big (> 65535 lines) " - "Line numbers and annotation for this file might " - "be wrong. Sorry", - curr_file_name); - } - /* fall through! */ - case N_SO: - lineno_overflows = 0; - - /* seems to give lots of locations in header files */ - /* case 130: */ /* BINCL */ - { - UChar* nm = stabstr + stab[i].n_un.n_strx; - UInt len = VG_(strlen)(nm); - - if (len > 0 && nm[len-1] != '/') { - curr_filenmoff = addStr ( si, nm ); - curr_file_name = stabstr + stab[i].n_un.n_strx; - } - else - if (len == 0) - curr_filenmoff = addStr ( si, "?1\0" ); - - break; - } - -# if 0 - case 162: /* EINCL */ - curr_filenmoff = addStr ( si, "?2\0" ); - break; -# endif - - default: - break; - } - } /* for (i = 0; i < stab_sz/(int)sizeof(struct nlist); i++) */ -} - - -/*------------------------------------------------------------*/ -/*--- Read DWARF2 format debug info. ---*/ -/*------------------------------------------------------------*/ - -/* Structure found in the .debug_line section. */ -typedef struct -{ - UChar li_length [4]; - UChar li_version [2]; - UChar li_prologue_length [4]; - UChar li_min_insn_length [1]; - UChar li_default_is_stmt [1]; - UChar li_line_base [1]; - UChar li_line_range [1]; - UChar li_opcode_base [1]; -} -DWARF2_External_LineInfo; - -typedef struct -{ - UInt li_length; - UShort li_version; - UInt li_prologue_length; - UChar li_min_insn_length; - UChar li_default_is_stmt; - Int li_line_base; - UChar li_line_range; - UChar li_opcode_base; -} -DWARF2_Internal_LineInfo; - -/* Line number opcodes. */ -enum dwarf_line_number_ops - { - DW_LNS_extended_op = 0, - DW_LNS_copy = 1, - DW_LNS_advance_pc = 2, - DW_LNS_advance_line = 3, - DW_LNS_set_file = 4, - DW_LNS_set_column = 5, - DW_LNS_negate_stmt = 6, - DW_LNS_set_basic_block = 7, - DW_LNS_const_add_pc = 8, - DW_LNS_fixed_advance_pc = 9, - /* DWARF 3. */ - DW_LNS_set_prologue_end = 10, - DW_LNS_set_epilogue_begin = 11, - DW_LNS_set_isa = 12 - }; - -/* Line number extended opcodes. */ -enum dwarf_line_number_x_ops - { - DW_LNE_end_sequence = 1, - DW_LNE_set_address = 2, - DW_LNE_define_file = 3 - }; - -typedef struct State_Machine_Registers -{ - Addr address; - UInt file; - UInt line; - UInt column; - Int is_stmt; - Int basic_block; - Int end_sequence; - /* This variable hold the number of the last entry seen - in the File Table. */ - UInt last_file_entry; -} SMR; - - -static -UInt read_leb128 ( UChar* data, Int* length_return, Int sign ) -{ - UInt result = 0; - UInt num_read = 0; - Int shift = 0; - UChar byte; - - do - { - byte = * data ++; - num_read ++; - - result |= (byte & 0x7f) << shift; - - shift += 7; - - } - while (byte & 0x80); - - if (length_return != NULL) - * length_return = num_read; - - if (sign && (shift < 32) && (byte & 0x40)) - result |= -1 << shift; - - return result; -} - - -static SMR state_machine_regs; - -static -void reset_state_machine ( Int is_stmt ) -{ - if (0) VG_(printf)("smr.a := %p (reset)\n", 0 ); - state_machine_regs.address = 0; - state_machine_regs.file = 1; - state_machine_regs.line = 1; - state_machine_regs.column = 0; - state_machine_regs.is_stmt = is_stmt; - state_machine_regs.basic_block = 0; - state_machine_regs.end_sequence = 0; - state_machine_regs.last_file_entry = 0; -} - -/* Handled an extend line op. Returns true if this is the end - of sequence. */ -static -int process_extended_line_op( SegInfo *si, UInt** fnames, - UChar* data, Int is_stmt, Int pointer_size) -{ - UChar op_code; - Int bytes_read; - UInt len; - UChar * name; - Addr adr; - - len = read_leb128 (data, & bytes_read, 0); - data += bytes_read; - - if (len == 0) - { - VG_(message)(Vg_UserMsg, - "badly formed extended line op encountered!\n"); - return bytes_read; - } - - len += bytes_read; - op_code = * data ++; - - - switch (op_code) - { - case DW_LNE_end_sequence: - if (0) VG_(printf)("1001: si->o %p, smr.a %p\n", - si->offset, state_machine_regs.address ); - state_machine_regs.end_sequence = 1; /* JRS: added for compliance - with spec; is pointless due to reset_state_machine below - */ - addLineInfo (si, (*fnames)[state_machine_regs.file], - si->offset + (state_machine_regs.address - 1), - si->offset + (state_machine_regs.address), - 0, 0); - reset_state_machine (is_stmt); - break; - - case DW_LNE_set_address: - /* XXX: Pointer size could be 8 */ - vg_assert(pointer_size == 4); - adr = *((Addr *)data); - if (0) VG_(printf)("smr.a := %p\n", adr ); - state_machine_regs.address = adr; - break; - - case DW_LNE_define_file: - ++ state_machine_regs.last_file_entry; - name = data; - if (*fnames == NULL) - *fnames = VG_(malloc)(VG_AR_SYMTAB, sizeof (UInt) * 2); - else - *fnames = VG_(realloc)( - VG_AR_SYMTAB, *fnames, - sizeof(UInt) - * (state_machine_regs.last_file_entry + 1)); - (*fnames)[state_machine_regs.last_file_entry] = addStr (si,name); - data += VG_(strlen) ((char *) data) + 1; - read_leb128 (data, & bytes_read, 0); - data += bytes_read; - read_leb128 (data, & bytes_read, 0); - data += bytes_read; - read_leb128 (data, & bytes_read, 0); - break; - - default: - break; - } - - return len; -} - - -static -void read_debuginfo_dwarf2 ( SegInfo* si, UChar* dwarf2, Int dwarf2_sz ) -{ - DWARF2_External_LineInfo * external; - DWARF2_Internal_LineInfo info; - UChar * standard_opcodes; - UChar * data = dwarf2; - UChar * end = dwarf2 + dwarf2_sz; - UChar * end_of_sequence; - UInt * fnames = NULL; - - /* Fails due to gcc padding ... - vg_assert(sizeof(DWARF2_External_LineInfo) - == sizeof(DWARF2_Internal_LineInfo)); - */ - - while (data < end) - { - external = (DWARF2_External_LineInfo *) data; - - /* Check the length of the block. */ - info.li_length = * ((UInt *)(external->li_length)); - - if (info.li_length == 0xffffffff) - { - vg_symerr("64-bit DWARF line info is not supported yet."); - break; - } - - if (info.li_length + sizeof (external->li_length) > dwarf2_sz) - { - vg_symerr("DWARF line info appears to be corrupt " - "- the section is too small"); - return; - } - - /* Check its version number. */ - info.li_version = * ((UShort *) (external->li_version)); - if (info.li_version != 2) - { - vg_symerr("Only DWARF version 2 line info " - "is currently supported."); - return; - } - - info.li_prologue_length = * ((UInt *) (external->li_prologue_length)); - info.li_min_insn_length = * ((UChar *)(external->li_min_insn_length)); - info.li_default_is_stmt = * ((UChar *)(external->li_default_is_stmt)); - - /* JRS: changed (UInt*) to (UChar*) */ - info.li_line_base = * ((UChar *)(external->li_line_base)); - - info.li_line_range = * ((UChar *)(external->li_line_range)); - info.li_opcode_base = * ((UChar *)(external->li_opcode_base)); - - /* Sign extend the line base field. */ - info.li_line_base <<= 24; - info.li_line_base >>= 24; - - end_of_sequence = data + info.li_length - + sizeof (external->li_length); - - reset_state_machine (info.li_default_is_stmt); - - /* Read the contents of the Opcodes table. */ - standard_opcodes = data + sizeof (* external); - - /* Read the contents of the Directory table. */ - data = standard_opcodes + info.li_opcode_base - 1; - - if (* data == 0) - { - } - else - { - /* We ignore the directory table, since gcc gives the entire - path as part of the filename */ - while (* data != 0) - { - data += VG_(strlen) ((char *) data) + 1; - } - } - - /* Skip the NUL at the end of the table. */ - if (*data != 0) { - vg_symerr("can't find NUL at end of DWARF2 directory table"); - return; - } - data ++; - - /* Read the contents of the File Name table. */ - if (* data == 0) - { - } - else - { - while (* data != 0) - { - UChar * name; - Int bytes_read; - - ++ state_machine_regs.last_file_entry; - name = data; - /* Since we don't have realloc (0, ....) == malloc (...) - semantics, we need to malloc the first time. */ - - if (fnames == NULL) - fnames = VG_(malloc)(VG_AR_SYMTAB, sizeof (UInt) * 2); - else - fnames = VG_(realloc)(VG_AR_SYMTAB, fnames, - sizeof(UInt) - * (state_machine_regs.last_file_entry + 1)); - data += VG_(strlen) ((Char *) data) + 1; - fnames[state_machine_regs.last_file_entry] = addStr (si,name); - - read_leb128 (data, & bytes_read, 0); - data += bytes_read; - read_leb128 (data, & bytes_read, 0); - data += bytes_read; - read_leb128 (data, & bytes_read, 0); - data += bytes_read; - } - } - - /* Skip the NUL at the end of the table. */ - if (*data != 0) { - vg_symerr("can't find NUL at end of DWARF2 file name table"); - return; - } - data ++; - - /* Now display the statements. */ - - while (data < end_of_sequence) - { - UChar op_code; - Int adv; - Int bytes_read; - - op_code = * data ++; - - if (op_code >= info.li_opcode_base) - { - Int advAddr; - op_code -= info.li_opcode_base; - adv = (op_code / info.li_line_range) - * info.li_min_insn_length; - advAddr = adv; - state_machine_regs.address += adv; - if (0) VG_(printf)("smr.a += %p\n", adv ); - adv = (op_code % info.li_line_range) + info.li_line_base; - if (0) VG_(printf)("1002: si->o %p, smr.a %p\n", - si->offset, state_machine_regs.address ); - addLineInfo (si, fnames[state_machine_regs.file], - si->offset + (state_machine_regs.address - - advAddr), - si->offset + (state_machine_regs.address), - state_machine_regs.line, 0); - state_machine_regs.line += adv; - } - else switch (op_code) - { - case DW_LNS_extended_op: - data += process_extended_line_op ( - si, &fnames, data, - info.li_default_is_stmt, sizeof (Addr)); - break; - - case DW_LNS_copy: - if (0) VG_(printf)("1002: si->o %p, smr.a %p\n", - si->offset, state_machine_regs.address ); - addLineInfo (si, fnames[state_machine_regs.file], - si->offset + state_machine_regs.address, - si->offset + (state_machine_regs.address + 1), - state_machine_regs.line , 0); - state_machine_regs.basic_block = 0; /* JRS added */ - break; - - case DW_LNS_advance_pc: - adv = info.li_min_insn_length - * read_leb128 (data, & bytes_read, 0); - data += bytes_read; - state_machine_regs.address += adv; - if (0) VG_(printf)("smr.a += %p\n", adv ); - break; - - case DW_LNS_advance_line: - adv = read_leb128 (data, & bytes_read, 1); - data += bytes_read; - state_machine_regs.line += adv; - break; - - case DW_LNS_set_file: - adv = read_leb128 (data, & bytes_read, 0); - data += bytes_read; - state_machine_regs.file = adv; - break; - - case DW_LNS_set_column: - adv = read_leb128 (data, & bytes_read, 0); - data += bytes_read; - state_machine_regs.column = adv; - break; - - case DW_LNS_negate_stmt: - adv = state_machine_regs.is_stmt; - adv = ! adv; - state_machine_regs.is_stmt = adv; - break; - - case DW_LNS_set_basic_block: - state_machine_regs.basic_block = 1; - break; - - case DW_LNS_const_add_pc: - adv = (((255 - info.li_opcode_base) / info.li_line_range) - * info.li_min_insn_length); - state_machine_regs.address += adv; - if (0) VG_(printf)("smr.a += %p\n", adv ); - break; - - case DW_LNS_fixed_advance_pc: - /* XXX: Need something to get 2 bytes */ - adv = *((UShort *)data); - data += 2; - state_machine_regs.address += adv; - if (0) VG_(printf)("smr.a += %p\n", adv ); - break; - - case DW_LNS_set_prologue_end: - break; - - case DW_LNS_set_epilogue_begin: - break; - - case DW_LNS_set_isa: - adv = read_leb128 (data, & bytes_read, 0); - data += bytes_read; - break; - - default: - { - int j; - for (j = standard_opcodes[op_code - 1]; j > 0 ; --j) - { - read_leb128 (data, &bytes_read, 0); - data += bytes_read; - } - } - break; - } - } - VG_(free)(VG_AR_SYMTAB, fnames); - fnames = NULL; - } -} - - -/*------------------------------------------------------------*/ -/*--- Read info from a .so/exe file. ---*/ -/*------------------------------------------------------------*/ - -/* Read the symbols from the object/exe specified by the SegInfo into - the tables within the supplied SegInfo. */ -static -void vg_read_lib_symbols ( SegInfo* si ) -{ - Elf32_Ehdr* ehdr; /* The ELF header */ - Elf32_Shdr* shdr; /* The section table */ - UChar* sh_strtab; /* The section table's string table */ - UChar* stab; /* The .stab table */ - UChar* stabstr; /* The .stab string table */ - UChar* dwarf2; /* The DWARF2 location info table */ - Int stab_sz; /* Size in bytes of the .stab table */ - Int stabstr_sz; /* Size in bytes of the .stab string table */ - Int dwarf2_sz; /* Size in bytes of the DWARF2 srcloc table*/ - Int fd; - Int i; - Bool ok; - Addr oimage; - Int n_oimage; - struct vki_stat stat_buf; - - oimage = (Addr)NULL; - if (VG_(clo_verbosity) > 1) - VG_(message)(Vg_UserMsg, "Reading syms from %s", si->filename ); - - /* mmap the object image aboard, so that we can read symbols and - line number info out of it. It will be munmapped immediately - thereafter; it is only aboard transiently. */ - - i = VG_(stat)(si->filename, &stat_buf); - if (i != 0) { - vg_symerr("Can't stat .so/.exe (to determine its size)?!"); - return; - } - n_oimage = stat_buf.st_size; - - fd = VG_(open_read)(si->filename); - if (fd == -1) { - vg_symerr("Can't open .so/.exe to read symbols?!"); - return; - } - - oimage = (Addr)VG_(mmap)( NULL, n_oimage, - VKI_PROT_READ, VKI_MAP_PRIVATE, fd, 0 ); - if (oimage == ((Addr)(-1))) { - VG_(message)(Vg_UserMsg, - "mmap failed on %s", si->filename ); - VG_(close)(fd); - return; - } - - VG_(close)(fd); - - /* Ok, the object image is safely in oimage[0 .. n_oimage-1]. - Now verify that it is a valid ELF .so or executable image. - */ - ok = (n_oimage >= sizeof(Elf32_Ehdr)); - ehdr = (Elf32_Ehdr*)oimage; - - if (ok) { - ok &= (ehdr->e_ident[EI_MAG0] == 0x7F - && ehdr->e_ident[EI_MAG1] == 'E' - && ehdr->e_ident[EI_MAG2] == 'L' - && ehdr->e_ident[EI_MAG3] == 'F'); - ok &= (ehdr->e_ident[EI_CLASS] == ELFCLASS32 - && ehdr->e_ident[EI_DATA] == ELFDATA2LSB - && ehdr->e_ident[EI_VERSION] == EV_CURRENT); - ok &= (ehdr->e_type == ET_EXEC || ehdr->e_type == ET_DYN); - ok &= (ehdr->e_machine == EM_386); - ok &= (ehdr->e_version == EV_CURRENT); - ok &= (ehdr->e_shstrndx != SHN_UNDEF); - ok &= (ehdr->e_shoff != 0 && ehdr->e_shnum != 0); - } - - if (!ok) { - vg_symerr("Invalid ELF header, or missing stringtab/sectiontab."); - VG_(munmap) ( (void*)oimage, n_oimage ); - return; - } - - if (VG_(clo_trace_symtab)) - VG_(printf)( - "shoff = %d, shnum = %d, size = %d, n_vg_oimage = %d\n", - ehdr->e_shoff, ehdr->e_shnum, sizeof(Elf32_Shdr), n_oimage ); - - if (ehdr->e_shoff + ehdr->e_shnum*sizeof(Elf32_Shdr) > n_oimage) { - vg_symerr("ELF section header is beyond image end?!"); - VG_(munmap) ( (void*)oimage, n_oimage ); - return; - } - - shdr = (Elf32_Shdr*)(oimage + ehdr->e_shoff); - sh_strtab = (UChar*)(oimage + shdr[ehdr->e_shstrndx].sh_offset); - - /* try and read the object's symbol table */ - { - UChar* o_strtab = NULL; - Elf32_Sym* o_symtab = NULL; - UInt o_strtab_sz = 0; - UInt o_symtab_sz = 0; - - UChar* o_got = NULL; - UChar* o_plt = NULL; - UInt o_got_sz = 0; - UInt o_plt_sz = 0; - - Bool snaffle_it; - Addr sym_addr; - - /* find the .stabstr and .stab sections */ - for (i = 0; i < ehdr->e_shnum; i++) { - if (0 == VG_(strcmp)(".symtab",sh_strtab + shdr[i].sh_name)) { - o_symtab = (Elf32_Sym*)(oimage + shdr[i].sh_offset); - o_symtab_sz = shdr[i].sh_size; - vg_assert((o_symtab_sz % sizeof(Elf32_Sym)) == 0); - /* check image overrun here */ - } - if (0 == VG_(strcmp)(".strtab",sh_strtab + shdr[i].sh_name)) { - o_strtab = (UChar*)(oimage + shdr[i].sh_offset); - o_strtab_sz = shdr[i].sh_size; - /* check image overrun here */ - } - - /* find out where the .got and .plt sections will be in the - executable image, not in the object image transiently loaded. - */ - if (0 == VG_(strcmp)(".got",sh_strtab + shdr[i].sh_name)) { - o_got = (UChar*)(si->offset - + shdr[i].sh_offset); - o_got_sz = shdr[i].sh_size; - /* check image overrun here */ - } - if (0 == VG_(strcmp)(".plt",sh_strtab + shdr[i].sh_name)) { - o_plt = (UChar*)(si->offset - + shdr[i].sh_offset); - o_plt_sz = shdr[i].sh_size; - /* check image overrun here */ - } - - } - - if (VG_(clo_trace_symtab)) { - if (o_plt) VG_(printf)( "PLT: %p .. %p\n", - o_plt, o_plt + o_plt_sz - 1 ); - if (o_got) VG_(printf)( "GOT: %p .. %p\n", - o_got, o_got + o_got_sz - 1 ); - } - - if (o_strtab == NULL || o_symtab == NULL) { - vg_symerr(" object doesn't have a symbol table"); - } else { - /* Perhaps should start at i = 1; ELF docs suggest that entry - 0 always denotes `unknown symbol'. */ - for (i = 1; i < o_symtab_sz/sizeof(Elf32_Sym); i++){ -# if 0 - VG_(printf)("raw symbol: "); - switch (ELF32_ST_BIND(o_symtab[i].st_info)) { - case STB_LOCAL: VG_(printf)("LOC "); break; - case STB_GLOBAL: VG_(printf)("GLO "); break; - case STB_WEAK: VG_(printf)("WEA "); break; - case STB_LOPROC: VG_(printf)("lop "); break; - case STB_HIPROC: VG_(printf)("hip "); break; - default: VG_(printf)("??? "); break; - } - switch (ELF32_ST_TYPE(o_symtab[i].st_info)) { - case STT_NOTYPE: VG_(printf)("NOT "); break; - case STT_OBJECT: VG_(printf)("OBJ "); break; - case STT_FUNC: VG_(printf)("FUN "); break; - case STT_SECTION: VG_(printf)("SEC "); break; - case STT_FILE: VG_(printf)("FIL "); break; - case STT_LOPROC: VG_(printf)("lop "); break; - case STT_HIPROC: VG_(printf)("hip "); break; - default: VG_(printf)("??? "); break; - } - VG_(printf)( - ": value %p, size %d, name %s\n", - si->offset+(UChar*)o_symtab[i].st_value, - o_symtab[i].st_size, - o_symtab[i].st_name - ? ((Char*)o_strtab+o_symtab[i].st_name) - : (Char*)"NONAME"); -# endif - - /* Figure out if we're interested in the symbol. - Firstly, is it of the right flavour? - */ - snaffle_it - = ( (ELF32_ST_BIND(o_symtab[i].st_info) == STB_GLOBAL || - ELF32_ST_BIND(o_symtab[i].st_info) == STB_LOCAL /* || - ELF32_ST_BIND(o_symtab[i].st_info) == STB_WEAK */) - && - (ELF32_ST_TYPE(o_symtab[i].st_info) == STT_FUNC /*|| - ELF32_ST_TYPE(o_symtab[i].st_info) == STT_OBJECT*/) - ); - - /* Secondly, if it's apparently in a GOT or PLT, it's really - a reference to a symbol defined elsewhere, so ignore it. - */ - sym_addr = si->offset - + (UInt)o_symtab[i].st_value; - if (o_got != NULL - && sym_addr >= (Addr)o_got - && sym_addr < (Addr)(o_got+o_got_sz)) { - snaffle_it = False; - if (VG_(clo_trace_symtab)) { - VG_(printf)( "in GOT: %s\n", - o_strtab+o_symtab[i].st_name); - } - } - if (o_plt != NULL - && sym_addr >= (Addr)o_plt - && sym_addr < (Addr)(o_plt+o_plt_sz)) { - snaffle_it = False; - if (VG_(clo_trace_symtab)) { - VG_(printf)( "in PLT: %s\n", - o_strtab+o_symtab[i].st_name); - } - } - - /* Don't bother if nameless, or zero-sized. */ - if (snaffle_it - && (o_symtab[i].st_name == (Elf32_Word)NULL - || /* VG_(strlen)(o_strtab+o_symtab[i].st_name) == 0 */ - /* equivalent but cheaper ... */ - * ((UChar*)(o_strtab+o_symtab[i].st_name)) == 0 - || o_symtab[i].st_size == 0)) { - snaffle_it = False; - if (VG_(clo_trace_symtab)) { - VG_(printf)( "size=0: %s\n", - o_strtab+o_symtab[i].st_name); - } - } - -# if 0 - /* Avoid _dl_ junk. (Why?) */ - /* 01-02-24: disabled until I find out if it really helps. */ - if (snaffle_it - && (VG_(strncmp)("_dl_", o_strtab+o_symtab[i].st_name, 4) == 0 - || VG_(strncmp)("_r_debug", - o_strtab+o_symtab[i].st_name, 8) == 0)) { - snaffle_it = False; - if (VG_(clo_trace_symtab)) { - VG_(printf)( "_dl_ junk: %s\n", - o_strtab+o_symtab[i].st_name); - } - } -# endif - - /* This seems to significantly reduce the number of junk - symbols, and particularly reduces the number of - overlapping address ranges. Don't ask me why ... */ - if (snaffle_it && (Int)o_symtab[i].st_value == 0) { - snaffle_it = False; - if (VG_(clo_trace_symtab)) { - VG_(printf)( "valu=0: %s\n", - o_strtab+o_symtab[i].st_name); - } - } - - /* If no part of the symbol falls within the mapped range, - ignore it. */ - if (sym_addr+o_symtab[i].st_size <= si->start - || sym_addr >= si->start+si->size) { - snaffle_it = False; - } - - if (snaffle_it) { - /* it's an interesting symbol; record ("snaffle") it. */ - RiSym sym; - Char* t0 = o_symtab[i].st_name - ? (Char*)(o_strtab+o_symtab[i].st_name) - : (Char*)"NONAME"; - Int nmoff = addStr ( si, t0 ); - vg_assert(nmoff >= 0 - /* && 0==VG_(strcmp)(t0,&vg_strtab[nmoff]) */ ); - vg_assert( (Int)o_symtab[i].st_value >= 0); - /* VG_(printf)("%p + %d: %s\n", si->addr, - (Int)o_symtab[i].st_value, t0 ); */ - sym.addr = sym_addr; - sym.size = o_symtab[i].st_size; - sym.nmoff = nmoff; - addSym ( si, &sym ); - } - } - } - } - - /* Reading of the stabs and/or dwarf2 debug format information, if - any. */ - stabstr = NULL; - stab = NULL; - dwarf2 = NULL; - stabstr_sz = 0; - stab_sz = 0; - dwarf2_sz = 0; - - /* find the .stabstr / .stab / .debug_line sections */ - for (i = 0; i < ehdr->e_shnum; i++) { - if (0 == VG_(strcmp)(".stab",sh_strtab + shdr[i].sh_name)) { - stab = (UChar*)(oimage + shdr[i].sh_offset); - stab_sz = shdr[i].sh_size; - } - if (0 == VG_(strcmp)(".stabstr",sh_strtab + shdr[i].sh_name)) { - stabstr = (UChar*)(oimage + shdr[i].sh_offset); - stabstr_sz = shdr[i].sh_size; - } - if (0 == VG_(strcmp)(".debug_line",sh_strtab + shdr[i].sh_name)) { - dwarf2 = (UChar *)(oimage + shdr[i].sh_offset); - dwarf2_sz = shdr[i].sh_size; - } - } - - if ((stab == NULL || stabstr == NULL) && dwarf2 == NULL) { - vg_symerr(" object doesn't have any debug info"); - VG_(munmap) ( (void*)oimage, n_oimage ); - return; - } - - if ( stab_sz + (UChar*)stab > n_oimage + (UChar*)oimage - || stabstr_sz + (UChar*)stabstr - > n_oimage + (UChar*)oimage ) { - vg_symerr(" ELF (stabs) debug data is beyond image end?!"); - VG_(munmap) ( (void*)oimage, n_oimage ); - return; - } - - if ( dwarf2_sz + (UChar*)dwarf2 > n_oimage + (UChar*)oimage ) { - vg_symerr(" ELF (dwarf2) debug data is beyond image end?!"); - VG_(munmap) ( (void*)oimage, n_oimage ); - return; - } - - /* Looks plausible. Go on and read debug data. */ - if (stab != NULL && stabstr != NULL) { - read_debuginfo_stabs ( si, stab, stab_sz, stabstr, stabstr_sz ); - } - - if (dwarf2 != NULL) { - read_debuginfo_dwarf2 ( si, dwarf2, dwarf2_sz ); - } - - /* Last, but not least, heave the oimage back overboard. */ - VG_(munmap) ( (void*)oimage, n_oimage ); -} - - -/*------------------------------------------------------------*/ -/*--- Main entry point for symbols table reading. ---*/ -/*------------------------------------------------------------*/ - -/* The root structure for the entire symbol table system. It is a - linked list of SegInfos. Note that this entire mechanism assumes - that what we read from /proc/self/maps doesn't contain overlapping - address ranges, and as a result the SegInfos in this list describe - disjoint address ranges. -*/ -static SegInfo* segInfo = NULL; - - -static -void read_symtab_callback ( - Addr start, UInt size, - Char rr, Char ww, Char xx, - UInt foffset, UChar* filename ) -{ - SegInfo* si; - - /* Stay sane ... */ - if (size == 0) - return; - - /* We're only interested in collecting symbols in executable - segments which are associated with a real file. Hence: */ - if (filename == NULL || xx != 'x') - return; - if (0 == VG_(strcmp)(filename, "/dev/zero")) - return; - - /* Perhaps we already have this one? If so, skip. */ - for (si = segInfo; si != NULL; si = si->next) { - /* - if (0==VG_(strcmp)(si->filename, filename)) - VG_(printf)("same fnames: %c%c%c (%p, %d) (%p, %d) %s\n", - rr,ww,xx,si->start,si->size,start,size,filename); - */ - /* For some reason the observed size of a mapping can change, so - we don't use that to determine uniqueness. */ - if (si->start == start - /* && si->size == size */ - && 0==VG_(strcmp)(si->filename, filename)) { - return; - } - } - - /* Get the record initialised right. */ - si = VG_(malloc)(VG_AR_SYMTAB, sizeof(SegInfo)); - si->next = segInfo; - segInfo = si; - - si->start = start; - si->size = size; - si->foffset = foffset; - si->filename = VG_(malloc)(VG_AR_SYMTAB, 1 + VG_(strlen)(filename)); - VG_(strcpy)(si->filename, filename); - - si->symtab = NULL; - si->symtab_size = si->symtab_used = 0; - si->loctab = NULL; - si->loctab_size = si->loctab_used = 0; - si->strtab = NULL; - si->strtab_size = si->strtab_used = 0; - - /* Kludge ... */ - si->offset - = si->start==VG_ASSUMED_EXE_BASE ? 0 : si->start; - - /* And actually fill it up. */ - if (VG_(clo_instrument) || VG_(clo_cachesim)) { - vg_read_lib_symbols ( si ); - canonicaliseSymtab ( si ); - canonicaliseLoctab ( si ); - } -} - - -/* This one really is the Head Honcho. Update the symbol tables to - reflect the current state of /proc/self/maps. Rather than re-read - everything, just read the entries which are not already in segInfo. - So we can call here repeatedly, after every mmap of a non-anonymous - segment with execute permissions, for example, to pick up new - libraries as they are dlopen'd. Conversely, when the client does - munmap(), vg_symtab_notify_munmap() throws away any symbol tables - which happen to correspond to the munmap()d area. */ -void VG_(read_symbols) ( void ) -{ - VG_(read_procselfmaps) ( read_symtab_callback ); - - /* Do a sanity check on the symbol tables: ensure that the address - space pieces they cover do not overlap (otherwise we are severely - hosed). This is a quadratic algorithm, but there shouldn't be - many of them. - */ - { SegInfo *si, *si2; - for (si = segInfo; si != NULL; si = si->next) { - /* Check no overlap between *si and those in the rest of the - list. */ - for (si2 = si->next; si2 != NULL; si2 = si2->next) { - Addr lo = si->start; - Addr hi = si->start + si->size - 1; - Addr lo2 = si2->start; - Addr hi2 = si2->start + si2->size - 1; - Bool overlap; - vg_assert(lo < hi); - vg_assert(lo2 < hi2); - /* the main assertion */ - overlap = (lo <= lo2 && lo2 <= hi) - || (lo <= hi2 && hi2 <= hi); - if (overlap) { - VG_(printf)("\n\nOVERLAPPING SEGMENTS\n" ); - ppSegInfo ( si ); - ppSegInfo ( si2 ); - VG_(printf)("\n\n"); - vg_assert(! overlap); - } - } - } - } -} - - -/* When an munmap() call happens, check to see whether it corresponds - to a segment for a .so, and if so discard the relevant SegInfo. - This might not be a very clever idea from the point of view of - accuracy of error messages, but we need to do it in order to - maintain the no-overlapping invariant. - - 16 May 02: Returns a Bool indicating whether or not the discarded - range falls inside a known executable segment. See comment at top - of file for why. -*/ -Bool VG_(symtab_notify_munmap) ( Addr start, UInt length ) -{ - SegInfo *prev, *curr; - - prev = NULL; - curr = segInfo; - while (True) { - if (curr == NULL) break; - if (start == curr->start) break; - prev = curr; - curr = curr->next; - } - if (curr == NULL) - return False; - - VG_(message)(Vg_UserMsg, - "discard syms in %s due to munmap()", - curr->filename ? curr->filename : (UChar*)"???"); - - vg_assert(prev == NULL || prev->next == curr); - - if (prev == NULL) { - segInfo = curr->next; - } else { - prev->next = curr->next; - } - - freeSegInfo(curr); - return True; -} - - -/*------------------------------------------------------------*/ -/*--- Use of symbol table & location info to create ---*/ -/*--- plausible-looking stack dumps. ---*/ -/*------------------------------------------------------------*/ - -/* Find a symbol-table index containing the specified pointer, or -1 - if not found. Binary search. */ - -static Int search_one_symtab ( SegInfo* si, Addr ptr ) -{ - Addr a_mid_lo, a_mid_hi; - Int mid, - lo = 0, - hi = si->symtab_used-1; - while (True) { - /* current unsearched space is from lo to hi, inclusive. */ - if (lo > hi) return -1; /* not found */ - mid = (lo + hi) / 2; - a_mid_lo = si->symtab[mid].addr; - a_mid_hi = ((Addr)si->symtab[mid].addr) + si->symtab[mid].size - 1; - - if (ptr < a_mid_lo) { hi = mid-1; continue; } - if (ptr > a_mid_hi) { lo = mid+1; continue; } - vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi); - return mid; - } -} - - -/* Search all symtabs that we know about to locate ptr. If found, set - *psi to the relevant SegInfo, and *symno to the symtab entry number - within that. If not found, *psi is set to NULL. */ - -static void search_all_symtabs ( Addr ptr, SegInfo** psi, Int* symno ) -{ - Int sno; - SegInfo* si; - for (si = segInfo; si != NULL; si = si->next) { - if (si->start <= ptr && ptr < si->start+si->size) { - sno = search_one_symtab ( si, ptr ); - if (sno == -1) goto not_found; - *symno = sno; - *psi = si; - return; - } - } - not_found: - *psi = NULL; -} - - -/* Find a location-table index containing the specified pointer, or -1 - if not found. Binary search. */ - -static Int search_one_loctab ( SegInfo* si, Addr ptr ) -{ - Addr a_mid_lo, a_mid_hi; - Int mid, - lo = 0, - hi = si->loctab_used-1; - while (True) { - /* current unsearched space is from lo to hi, inclusive. */ - if (lo > hi) return -1; /* not found */ - mid = (lo + hi) / 2; - a_mid_lo = si->loctab[mid].addr; - a_mid_hi = ((Addr)si->loctab[mid].addr) + si->loctab[mid].size - 1; - - if (ptr < a_mid_lo) { hi = mid-1; continue; } - if (ptr > a_mid_hi) { lo = mid+1; continue; } - vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi); - return mid; - } -} - - -/* Search all loctabs that we know about to locate ptr. If found, set - *psi to the relevant SegInfo, and *locno to the loctab entry number - within that. If not found, *psi is set to NULL. -*/ -static void search_all_loctabs ( Addr ptr, SegInfo** psi, Int* locno ) -{ - Int lno; - SegInfo* si; - for (si = segInfo; si != NULL; si = si->next) { - if (si->start <= ptr && ptr < si->start+si->size) { - lno = search_one_loctab ( si, ptr ); - if (lno == -1) goto not_found; - *locno = lno; - *psi = si; - return; - } - } - not_found: - *psi = NULL; -} - - -/* The whole point of this whole big deal: map a code address to a - plausible symbol name. Returns False if no idea; otherwise True. - Caller supplies buf and nbuf. If no_demangle is True, don't do - demangling, regardless of vg_clo_demangle -- probably because the - call has come from vg_what_fn_or_object_is_this. */ -Bool VG_(what_fn_is_this) ( Bool no_demangle, Addr a, - Char* buf, Int nbuf ) -{ - SegInfo* si; - Int sno; - search_all_symtabs ( a, &si, &sno ); - if (si == NULL) - return False; - if (no_demangle) { - VG_(strncpy_safely) - ( buf, & si->strtab[si->symtab[sno].nmoff], nbuf ); - } else { - VG_(demangle) ( & si->strtab[si->symtab[sno].nmoff], buf, nbuf ); - } - return True; -} - - -/* Map a code address to the name of a shared object file. Returns - False if no idea; otherwise False. Caller supplies buf and - nbuf. */ -static -Bool vg_what_object_is_this ( Addr a, Char* buf, Int nbuf ) -{ - SegInfo* si; - for (si = segInfo; si != NULL; si = si->next) { - if (si->start <= a && a < si->start+si->size) { - VG_(strncpy_safely)(buf, si->filename, nbuf); - return True; - } - } - return False; -} - -/* Return the name of an erring fn in a way which is useful - for comparing against the contents of a suppressions file. - Always writes something to buf. Also, doesn't demangle the - name, because we want to refer to mangled names in the - suppressions file. -*/ -void VG_(what_obj_and_fun_is_this) ( Addr a, - Char* obj_buf, Int n_obj_buf, - Char* fun_buf, Int n_fun_buf ) -{ - (void)vg_what_object_is_this ( a, obj_buf, n_obj_buf ); - (void)VG_(what_fn_is_this) ( True, a, fun_buf, n_fun_buf ); -} - - -/* Map a code address to a (filename, line number) pair. - Returns True if successful. -*/ -Bool VG_(what_line_is_this)( Addr a, - UChar* filename, Int n_filename, - UInt* lineno ) -{ - SegInfo* si; - Int locno; - search_all_loctabs ( a, &si, &locno ); - if (si == NULL) - return False; - VG_(strncpy_safely)(filename, & si->strtab[si->loctab[locno].fnmoff], - n_filename); - *lineno = si->loctab[locno].lineno; - - return True; -} - - -/* Print a mini stack dump, showing the current location. */ -void VG_(mini_stack_dump) ( ExeContext* ec ) -{ - -#define APPEND(str) \ - { UChar* sss; \ - for (sss = str; n < M_VG_ERRTXT-1 && *sss != 0; n++,sss++) \ - buf[n] = *sss; \ - buf[n] = 0; \ - } - - Bool know_fnname; - Bool know_objname; - Bool know_srcloc; - UInt lineno; - UChar ibuf[20]; - UInt i, n; - - UChar buf[M_VG_ERRTXT]; - UChar buf_fn[M_VG_ERRTXT]; - UChar buf_obj[M_VG_ERRTXT]; - UChar buf_srcloc[M_VG_ERRTXT]; - - Int stop_at = VG_(clo_backtrace_size); - - n = 0; - - know_fnname = VG_(what_fn_is_this)(False,ec->eips[0], buf_fn, M_VG_ERRTXT); - know_objname = vg_what_object_is_this(ec->eips[0], buf_obj, M_VG_ERRTXT); - know_srcloc = VG_(what_line_is_this)(ec->eips[0], - buf_srcloc, M_VG_ERRTXT, - &lineno); - - APPEND(" at "); - VG_(sprintf)(ibuf,"0x%x: ", ec->eips[0]); - APPEND(ibuf); - if (know_fnname) { - APPEND(buf_fn); - if (!know_srcloc && know_objname) { - APPEND(" (in "); - APPEND(buf_obj); - APPEND(")"); - } - } else if (know_objname && !know_srcloc) { - APPEND("(within "); - APPEND(buf_obj); - APPEND(")"); - } else { - APPEND("???"); - } - if (know_srcloc) { - APPEND(" ("); - APPEND(buf_srcloc); - APPEND(":"); - VG_(sprintf)(ibuf,"%d",lineno); - APPEND(ibuf); - APPEND(")"); - } - VG_(message)(Vg_UserMsg, "%s", buf); - - for (i = 1; i < stop_at && ec->eips[i] != 0; i++) { - know_fnname = VG_(what_fn_is_this)(False,ec->eips[i], buf_fn, M_VG_ERRTXT); - know_objname = vg_what_object_is_this(ec->eips[i],buf_obj, M_VG_ERRTXT); - know_srcloc = VG_(what_line_is_this)(ec->eips[i], - buf_srcloc, M_VG_ERRTXT, - &lineno); - n = 0; - APPEND(" by "); - VG_(sprintf)(ibuf,"0x%x: ",ec->eips[i]); - APPEND(ibuf); - if (know_fnname) { - APPEND(buf_fn) - if (!know_srcloc && know_objname) { - APPEND(" (in "); - APPEND(buf_obj); - APPEND(")"); - } - } else { - if (know_objname && !know_srcloc) { - APPEND("(within "); - APPEND(buf_obj); - APPEND(")"); - } else { - APPEND("???"); - } - }; - if (know_srcloc) { - APPEND(" ("); - APPEND(buf_srcloc); - APPEND(":"); - VG_(sprintf)(ibuf,"%d",lineno); - APPEND(ibuf); - APPEND(")"); - } - VG_(message)(Vg_UserMsg, "%s", buf); - } -} - -#undef APPEND - -/*--------------------------------------------------------------------*/ -/*--- end vg_symtab2.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_syscall.S b/coregrind/vg_syscall.S deleted file mode 100644 index adabbedbbe..0000000000 --- a/coregrind/vg_syscall.S +++ /dev/null @@ -1,104 +0,0 @@ - -##--------------------------------------------------------------------## -##--- Support for doing system calls. ---## -##--- vg_syscall.S ---## -##--------------------------------------------------------------------## - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_constants.h" - - -.globl VG_(do_syscall) - -# NOTE that this routine expects the simulated machines state -# to be in m_state_static. Therefore it needs to be wrapped by -# code which copies from baseBlock before the call, into -# m_state_static, and back afterwards. - -VG_(do_syscall): - # Save all the int registers of the real machines state on the - # simulators stack. - pushal - - # and save the real FPU state too - fwait - fnsave VG_(real_fpu_state_saved_over_syscall) - frstor VG_(real_fpu_state_saved_over_syscall) - - # remember what the simulators stack pointer is - movl %esp, VG_(esp_saved_over_syscall) - - # Now copy the simulated machines state into the real one - # esp still refers to the simulators stack - frstor VG_(m_state_static)+40 - movl VG_(m_state_static)+32, %eax - pushl %eax - popfl - movl VG_(m_state_static)+0, %eax - movl VG_(m_state_static)+4, %ecx - movl VG_(m_state_static)+8, %edx - movl VG_(m_state_static)+12, %ebx - movl VG_(m_state_static)+16, %esp - movl VG_(m_state_static)+20, %ebp - movl VG_(m_state_static)+24, %esi - movl VG_(m_state_static)+28, %edi - - # esp now refers to the simulatees stack - # Do the actual system call - int $0x80 - - # restore stack as soon as possible - # esp refers to simulatees stack - movl %esp, VG_(m_state_static)+16 - movl VG_(esp_saved_over_syscall), %esp - # esp refers to simulators stack - - # ... and undo everything else. - # Copy real state back to simulated state. - movl %eax, VG_(m_state_static)+0 - movl %ecx, VG_(m_state_static)+4 - movl %edx, VG_(m_state_static)+8 - movl %ebx, VG_(m_state_static)+12 - movl %ebp, VG_(m_state_static)+20 - movl %esi, VG_(m_state_static)+24 - movl %edi, VG_(m_state_static)+28 - pushfl - popl %eax - movl %eax, VG_(m_state_static)+32 - fwait - fnsave VG_(m_state_static)+40 - frstor VG_(m_state_static)+40 - - # Restore the state of the simulator - frstor VG_(real_fpu_state_saved_over_syscall) - popal - - ret - -##--------------------------------------------------------------------## -##--- end vg_syscall.S ---## -##--------------------------------------------------------------------## diff --git a/coregrind/vg_to_ucode.c b/coregrind/vg_to_ucode.c deleted file mode 100644 index 179c0592f5..0000000000 --- a/coregrind/vg_to_ucode.c +++ /dev/null @@ -1,4674 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- The JITter: translate x86 code to ucode. ---*/ -/*--- vg_to_ucode.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_include.h" - - -/*------------------------------------------------------------*/ -/*--- Renamings of frequently-used global functions. ---*/ -/*------------------------------------------------------------*/ - -#define uInstr0 VG_(newUInstr0) -#define uInstr1 VG_(newUInstr1) -#define uInstr2 VG_(newUInstr2) -#define uInstr3 VG_(newUInstr3) -#define dis VG_(disassemble) -#define nameIReg VG_(nameOfIntReg) -#define nameISize VG_(nameOfIntSize) -#define newTemp VG_(getNewTemp) -#define uLiteral VG_(setLiteralField) - - -/*------------------------------------------------------------*/ -/*--- Here so it can be inlined everywhere. ---*/ -/*------------------------------------------------------------*/ - -/* Allocate a new temp reg number. */ -__inline__ Int VG_(getNewTemp) ( UCodeBlock* cb ) -{ - Int t = cb->nextTemp; - cb->nextTemp += 2; - return t; -} - -Int VG_(getNewShadow) ( UCodeBlock* cb ) -{ - Int t = cb->nextTemp; - cb->nextTemp += 2; - return SHADOW(t); -} - -/* Handy predicates. */ -#define SMC_IF_SOME(cb) \ - do { \ - if (VG_(clo_smc_check) >= VG_CLO_SMC_SOME) { \ - LAST_UINSTR((cb)).smc_check = True; \ - } \ - } while (0) - -#define SMC_IF_ALL(cb) \ - do { \ - if (VG_(clo_smc_check) == VG_CLO_SMC_ALL) { \ - LAST_UINSTR((cb)).smc_check = True; \ - } \ - } while (0) - - -/*------------------------------------------------------------*/ -/*--- Helper bits and pieces for deconstructing the ---*/ -/*--- x86 insn stream. ---*/ -/*------------------------------------------------------------*/ - -static Char* nameGrp1 ( Int opc_aux ) -{ - static Char* grp1_names[8] - = { "add", "or", "adc", "sbb", "and", "sub", "xor", "cmp" }; - if (opc_aux < 0 || opc_aux > 7) VG_(panic)("nameGrp1"); - return grp1_names[opc_aux]; -} - -static Char* nameGrp2 ( Int opc_aux ) -{ - static Char* grp2_names[8] - = { "rol", "ror", "rcl", "rcr", "shl", "shr", "shl", "sar" }; - if (opc_aux < 0 || opc_aux > 7) VG_(panic)("nameGrp2"); - return grp2_names[opc_aux]; -} - -static Char* nameGrp4 ( Int opc_aux ) -{ - static Char* grp4_names[8] - = { "inc", "dec", "???", "???", "???", "???", "???", "???" }; - if (opc_aux < 0 || opc_aux > 1) VG_(panic)("nameGrp4"); - return grp4_names[opc_aux]; -} - -static Char* nameGrp5 ( Int opc_aux ) -{ - static Char* grp5_names[8] - = { "inc", "dec", "call*", "call*", "jmp*", "jmp*", "push", "???" }; - if (opc_aux < 0 || opc_aux > 6) VG_(panic)("nameGrp5"); - return grp5_names[opc_aux]; -} - -static Char* nameGrp8 ( Int opc_aux ) -{ - static Char* grp8_names[8] - = { "???", "???", "???", "???", "bt", "bts", "btr", "btc" }; - if (opc_aux < 4 || opc_aux > 7) VG_(panic)("nameGrp8"); - return grp8_names[opc_aux]; -} - -Char* VG_(nameOfIntReg) ( Int size, Int reg ) -{ - static Char* ireg32_names[8] - = { "%eax", "%ecx", "%edx", "%ebx", - "%esp", "%ebp", "%esi", "%edi" }; - static Char* ireg16_names[8] - = { "%ax", "%cx", "%dx", "%bx", "%sp", "%bp", "%si", "%di" }; - static Char* ireg8_names[8] - = { "%al", "%cl", "%dl", "%bl", "%ah{sp}", "%ch{bp}", "%dh{si}", "%bh{di}" }; - if (reg < 0 || reg > 7) goto bad; - switch (size) { - case 4: return ireg32_names[reg]; - case 2: return ireg16_names[reg]; - case 1: return ireg8_names[reg]; - } - bad: - VG_(panic)("nameOfIntReg"); - return NULL; /*notreached*/ -} - -Char VG_(nameOfIntSize) ( Int size ) -{ - switch (size) { - case 4: return 'l'; - case 2: return 'w'; - case 1: return 'b'; - default: VG_(panic)("nameOfIntSize"); - } -} - -__inline__ UInt VG_(extend_s_8to32) ( UInt x ) -{ - return (UInt)((((Int)x) << 24) >> 24); -} - -__inline__ static UInt extend_s_16to32 ( UInt x ) -{ - return (UInt)((((Int)x) << 16) >> 16); -} - - -/* Get a byte value out of the insn stream and sign-extend to 32 - bits. */ -__inline__ static UInt getSDisp8 ( Addr eip0 ) -{ - UChar* eip = (UChar*)eip0; - return VG_(extend_s_8to32)( (UInt) (eip[0]) ); -} - -__inline__ static UInt getSDisp16 ( Addr eip0 ) -{ - UChar* eip = (UChar*)eip0; - UInt d = *eip++; - d |= ((*eip++) << 8); - return extend_s_16to32(d); -} - -/* Get a 32-bit value out of the insn stream. */ -__inline__ static UInt getUDisp32 ( Addr eip0 ) -{ - UChar* eip = (UChar*)eip0; - UInt v = eip[3]; v <<= 8; - v |= eip[2]; v <<= 8; - v |= eip[1]; v <<= 8; - v |= eip[0]; - return v; -} - -__inline__ static UInt getUDisp16 ( Addr eip0 ) -{ - UChar* eip = (UChar*)eip0; - UInt v = eip[1]; v <<= 8; - v |= eip[0]; - return v; -} - -__inline__ static UChar getUChar ( Addr eip0 ) -{ - UChar* eip = (UChar*)eip0; - return eip[0]; -} - -__inline__ static UInt LOW24 ( UInt x ) -{ - return x & 0x00FFFFFF; -} - -__inline__ static UInt HI8 ( UInt x ) -{ - return x >> 24; -} - -__inline__ static UInt getUDisp ( Int size, Addr eip ) -{ - switch (size) { - case 4: return getUDisp32(eip); - case 2: return getUDisp16(eip); - case 1: return getUChar(eip); - default: VG_(panic)("getUDisp"); - } - return 0; /*notreached*/ -} - -__inline__ static UInt getSDisp ( Int size, Addr eip ) -{ - switch (size) { - case 4: return getUDisp32(eip); - case 2: return getSDisp16(eip); - case 1: return getSDisp8(eip); - default: VG_(panic)("getUDisp"); - } - return 0; /*notreached*/ -} - - -/*------------------------------------------------------------*/ -/*--- Flag-related helpers. ---*/ -/*------------------------------------------------------------*/ - -/* For the last uinsn inserted into cb, set the read, written and - undefined flags. Undefined flags are counted as written, but it - seems worthwhile to distinguish them. -*/ -static __inline__ void uFlagsRWU ( UCodeBlock* cb, - FlagSet rr, FlagSet ww, FlagSet uu ) -{ - VG_(setFlagRW)( - &LAST_UINSTR(cb), rr, VG_UNION_FLAG_SETS(ww,uu) - ); -} - - -static void setFlagsFromUOpcode ( UCodeBlock* cb, Int uopc ) -{ - switch (uopc) { - case XOR: case OR: case AND: - uFlagsRWU(cb, FlagsEmpty, FlagsOSZCP, FlagA); break; - case ADC: case SBB: - uFlagsRWU(cb, FlagC, FlagsOSZACP, FlagsEmpty); break; - case ADD: case SUB: case NEG: - uFlagsRWU(cb, FlagsEmpty, FlagsOSZACP, FlagsEmpty); break; - case INC: case DEC: - uFlagsRWU(cb, FlagsEmpty, FlagsOSZAP, FlagsEmpty); break; - case SHR: case SAR: case SHL: - uFlagsRWU(cb, FlagsEmpty, FlagsOSZCP, FlagA); break; - case ROL: case ROR: - uFlagsRWU(cb, FlagsEmpty, FlagsOC, FlagsEmpty); break; - case RCR: case RCL: - uFlagsRWU(cb, FlagC, FlagsOC, FlagsEmpty); break; - case NOT: - uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty); break; - default: - VG_(printf)("unhandled case is %s\n", - VG_(nameUOpcode)(True, uopc)); - VG_(panic)("setFlagsFromUOpcode: unhandled case"); - } -} - -static __inline__ void uCond ( UCodeBlock* cb, Condcode cond ) -{ - LAST_UINSTR(cb).cond = cond; -} - - -/*------------------------------------------------------------*/ -/*--- Disassembling addressing modes ---*/ -/*------------------------------------------------------------*/ - -/* Generate ucode to calculate an address indicated by a ModRM and - following SIB bytes, getting the value in a new temporary. The - temporary, and the number of bytes in the address mode, are - returned, as a pair (length << 8) | temp. Note that this fn should - not be called if the R/M part of the address denotes a register - instead of memory. If buf is non-NULL, text of the addressing mode - is placed therein. */ - -static UInt disAMode ( UCodeBlock* cb, Addr eip0, UChar* buf ) -{ - UChar* eip = (UChar*)eip0; - UChar mod_reg_rm = *eip++; - Int tmp = newTemp(cb); - - /* squeeze out the reg field from mod_reg_rm, since a 256-entry - jump table seems a bit excessive. - */ - mod_reg_rm &= 0xC7; /* is now XX000YYY */ - mod_reg_rm |= (mod_reg_rm >> 3); /* is now XX0XXYYY */ - mod_reg_rm &= 0x1F; /* is now 000XXYYY */ - switch (mod_reg_rm) { - - /* (%eax) .. (%edi), not including (%esp) or (%ebp). - --> GET %reg, t - */ - case 0x00: case 0x01: case 0x02: case 0x03: - /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: - { UChar rm = mod_reg_rm; - uInstr2(cb, GET, 4, ArchReg, rm, TempReg, tmp); - if (buf) VG_(sprintf)(buf,"(%s)", nameIReg(4,rm)); - return (1<<24 | tmp); - } - - /* d8(%eax) ... d8(%edi), not including d8(%esp) - --> GET %reg, t ; ADDL d8, t - */ - case 0x08: case 0x09: case 0x0A: case 0x0B: - /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: - { UChar rm = mod_reg_rm & 7; - Int tmq = newTemp(cb); - UInt d = getSDisp8((Addr)eip); eip++; - uInstr2(cb, GET, 4, ArchReg, rm, TempReg, tmq); - uInstr2(cb, LEA1, 4, TempReg, tmq, TempReg, tmp); - LAST_UINSTR(cb).lit32 = d; - if (buf) VG_(sprintf)(buf,"%d(%s)", d, nameIReg(4,rm)); - return (2<<24 | tmp); - } - - /* d32(%eax) ... d32(%edi), not including d32(%esp) - --> GET %reg, t ; ADDL d8, t - */ - case 0x10: case 0x11: case 0x12: case 0x13: - /* ! 14 */ case 0x15: case 0x16: case 0x17: - { UChar rm = mod_reg_rm & 7; - Int tmq = newTemp(cb); - UInt d = getUDisp32((Addr)eip); eip += 4; - uInstr2(cb, GET, 4, ArchReg, rm, TempReg, tmq); - uInstr2(cb, LEA1, 4, TempReg, tmq, TempReg, tmp); - LAST_UINSTR(cb).lit32 = d; - if (buf) VG_(sprintf)(buf,"0x%x(%s)", d, nameIReg(4,rm)); - return (5<<24 | tmp); - } - - /* a register, %eax .. %edi. This shouldn't happen. */ - case 0x18: case 0x19: case 0x1A: case 0x1B: - case 0x1C: case 0x1D: case 0x1E: case 0x1F: - VG_(panic)("disAMode: not an addr!"); - - /* a 32-bit literal address - --> MOV d32, tmp - */ - case 0x05: - { UInt d = getUDisp32((Addr)eip); eip += 4; - uInstr2(cb, MOV, 4, Literal, 0, TempReg, tmp); - uLiteral(cb, d); - if (buf) VG_(sprintf)(buf,"(0x%x)", d); - return (5<<24 | tmp); - } - - case 0x04: { - /* SIB, with no displacement. Special cases: - -- %esp cannot act as an index value. - If index_r indicates %esp, zero is used for the index. - -- when mod is zero and base indicates EBP, base is instead - a 32-bit literal. - It's all madness, I tell you. Extract %index, %base and - scale from the SIB byte. The value denoted is then: - | %index == %ESP && %base == %EBP - = d32 following SIB byte - | %index == %ESP && %base != %EBP - = %base - | %index != %ESP && %base == %EBP - = d32 following SIB byte + (%index << scale) - | %index != %ESP && %base != %ESP - = %base + (%index << scale) - - What happens to the souls of CPU architects who dream up such - horrendous schemes, do you suppose? - */ - UChar sib = *eip++; - UChar scale = (sib >> 6) & 3; - UChar index_r = (sib >> 3) & 7; - UChar base_r = sib & 7; - - if (index_r != R_ESP && base_r != R_EBP) { - Int index_tmp = newTemp(cb); - Int base_tmp = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, index_r, TempReg, index_tmp); - uInstr2(cb, GET, 4, ArchReg, base_r, TempReg, base_tmp); - uInstr3(cb, LEA2, 4, TempReg, base_tmp, TempReg, index_tmp, - TempReg, tmp); - LAST_UINSTR(cb).lit32 = 0; - LAST_UINSTR(cb).extra4b = 1 << scale; - if (buf) VG_(sprintf)(buf,"(%s,%s,%d)", nameIReg(4,base_r), - nameIReg(4,index_r),1<> 6) & 3; - UChar index_r = (sib >> 3) & 7; - UChar base_r = sib & 7; - UInt d = getSDisp8((Addr)eip); eip++; - - if (index_r == R_ESP) { - Int tmq = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, base_r, TempReg, tmq); - uInstr2(cb, LEA1, 4, TempReg, tmq, TempReg, tmp); - LAST_UINSTR(cb).lit32 = d; - if (buf) VG_(sprintf)(buf,"%d(%s,,)", d, nameIReg(4,base_r)); - return (3<<24 | tmp); - } else { - Int index_tmp = newTemp(cb); - Int base_tmp = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, index_r, TempReg, index_tmp); - uInstr2(cb, GET, 4, ArchReg, base_r, TempReg, base_tmp); - uInstr3(cb, LEA2, 4, TempReg, base_tmp, TempReg, index_tmp, - TempReg, tmp); - LAST_UINSTR(cb).lit32 = d; - LAST_UINSTR(cb).extra4b = 1 << scale; - if (buf) VG_(sprintf)(buf,"%d(%s,%s,%d)", d, nameIReg(4,base_r), - nameIReg(4,index_r), 1<> 6) & 3; - UChar index_r = (sib >> 3) & 7; - UChar base_r = sib & 7; - UInt d = getUDisp32((Addr)eip); eip += 4; - - if (index_r == R_ESP) { - Int tmq = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, base_r, TempReg, tmq); - uInstr2(cb, LEA1, 4, TempReg, tmq, TempReg, tmp); - LAST_UINSTR(cb).lit32 = d; - if (buf) VG_(sprintf)(buf,"%d(%s,,)", d, nameIReg(4,base_r)); - return (6<<24 | tmp); - } else { - Int index_tmp = newTemp(cb); - Int base_tmp = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, index_r, TempReg, index_tmp); - uInstr2(cb, GET, 4, ArchReg, base_r, TempReg, base_tmp); - uInstr3(cb, LEA2, 4, TempReg, base_tmp, TempReg, index_tmp, - TempReg, tmp); - LAST_UINSTR(cb).lit32 = d; - LAST_UINSTR(cb).extra4b = 1 << scale; - if (buf) VG_(sprintf)(buf,"%d(%s,%s,%d)", d, nameIReg(4,base_r), - nameIReg(4,index_r), 1<> 3); /* is now XX0XXYYY */ - mod_reg_rm &= 0x1F; /* is now 000XXYYY */ - switch (mod_reg_rm) { - - /* (%eax) .. (%edi), not including (%esp) or (%ebp). */ - case 0x00: case 0x01: case 0x02: case 0x03: - /* ! 04 */ /* ! 05 */ case 0x06: case 0x07: - return 1; - - /* d8(%eax) ... d8(%edi), not including d8(%esp). */ - case 0x08: case 0x09: case 0x0A: case 0x0B: - /* ! 0C */ case 0x0D: case 0x0E: case 0x0F: - return 2; - - /* d32(%eax) ... d32(%edi), not including d32(%esp). */ - case 0x10: case 0x11: case 0x12: case 0x13: - /* ! 14 */ case 0x15: case 0x16: case 0x17: - return 5; - - /* a register, %eax .. %edi. (Not an addr, but still handled.) */ - case 0x18: case 0x19: case 0x1A: case 0x1B: - case 0x1C: case 0x1D: case 0x1E: case 0x1F: - return 1; - - /* a 32-bit literal address. */ - case 0x05: return 5; - - /* SIB, no displacement. */ - case 0x04: { - UChar sib = *eip++; - UChar base_r = sib & 7; - if (base_r == R_EBP) return 6; else return 2; - } - /* SIB, with 8-bit displacement. */ - case 0x0C: return 3; - - /* SIB, with 32-bit displacement. */ - case 0x14: return 6; - - default: - VG_(panic)("amode_from_RM"); - return 0; /*notreached*/ - } -} - - -/* Extract the reg field from a modRM byte. */ -static __inline__ Int gregOfRM ( UChar mod_reg_rm ) -{ - return (Int)( (mod_reg_rm >> 3) & 7 ); -} - -/* Figure out whether the mod and rm parts of a modRM byte refer to a - register or memory. If so, the byte will have the form 11XXXYYY, - where YYY is the register number. */ -static __inline__ Bool epartIsReg ( UChar mod_reg_rm ) -{ - return (0xC0 == (mod_reg_rm & 0xC0)); -} - -/* ... and extract the register number ... */ -static __inline__ Int eregOfRM ( UChar mod_reg_rm ) -{ - return (Int)(mod_reg_rm & 0x7); -} - - -/*------------------------------------------------------------*/ -/*--- Disassembling common idioms ---*/ -/*------------------------------------------------------------*/ - -static -void codegen_XOR_reg_with_itself ( UCodeBlock* cb, Int size, - Int ge_reg, Int tmp ) -{ - if (dis) - VG_(printf)("xor%c %s, %s\n", nameISize(size), - nameIReg(size,ge_reg), nameIReg(size,ge_reg) ); - uInstr2(cb, MOV, size, Literal, 0, TempReg, tmp); - uLiteral(cb, 0); - uInstr2(cb, XOR, size, TempReg, tmp, TempReg, tmp); - setFlagsFromUOpcode(cb, XOR); - uInstr2(cb, PUT, size, TempReg, tmp, ArchReg, ge_reg); -} - - -/* Handle binary integer instructions of the form - op E, G meaning - op reg-or-mem, reg - Is passed the a ptr to the modRM byte, the actual operation, and the - data size. Returns the address advanced completely over this - instruction. - - E(src) is reg-or-mem - G(dst) is reg. - - If E is reg, --> GET %G, tmp - OP %E, tmp - PUT tmp, %G - - If E is mem and OP is not reversible, - --> (getAddr E) -> tmpa - LD (tmpa), tmpa - GET %G, tmp2 - OP tmpa, tmp2 - PUT tmp2, %G - - If E is mem and OP is reversible - --> (getAddr E) -> tmpa - LD (tmpa), tmpa - OP %G, tmpa - PUT tmpa, %G -*/ -static -Addr dis_op2_E_G ( UCodeBlock* cb, - Opcode opc, - Bool keep, - Int size, - Addr eip0, - Char* t_x86opc ) -{ - Bool reversible; - UChar rm = getUChar(eip0); - UChar dis_buf[50]; - - if (epartIsReg(rm)) { - Int tmp = newTemp(cb); - - /* Specially handle XOR reg,reg, because that doesn't really - depend on reg, and doing the obvious thing potentially - generates a spurious value check failure due to the bogus - dependency. */ - if (opc == XOR && gregOfRM(rm) == eregOfRM(rm)) { - codegen_XOR_reg_with_itself ( cb, size, gregOfRM(rm), tmp ); - return 1+eip0; - } - - uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, tmp); - if (opc == AND || opc == OR) { - Int tao = newTemp(cb); - uInstr2(cb, GET, size, ArchReg, eregOfRM(rm), TempReg, tao); - uInstr2(cb, opc, size, TempReg, tao, TempReg, tmp); - setFlagsFromUOpcode(cb, opc); - } else { - uInstr2(cb, opc, size, ArchReg, eregOfRM(rm), TempReg, tmp); - setFlagsFromUOpcode(cb, opc); - } - if (keep) - uInstr2(cb, PUT, size, TempReg, tmp, ArchReg, gregOfRM(rm)); - if (dis) VG_(printf)("%s%c %s,%s\n", t_x86opc, nameISize(size), - nameIReg(size,eregOfRM(rm)), - nameIReg(size,gregOfRM(rm))); - return 1+eip0; - } - - /* E refers to memory */ - reversible - = (opc == ADD || opc == OR || opc == AND || opc == XOR || opc == ADC) - ? True : False; - if (reversible) { - UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL); - Int tmpa = LOW24(pair); - uInstr2(cb, LOAD, size, TempReg, tmpa, TempReg, tmpa); - - if (opc == AND || opc == OR) { - Int tao = newTemp(cb); - uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, tao); - uInstr2(cb, opc, size, TempReg, tao, TempReg, tmpa); - setFlagsFromUOpcode(cb, opc); - } else { - uInstr2(cb, opc, size, ArchReg, gregOfRM(rm), TempReg, tmpa); - setFlagsFromUOpcode(cb, opc); - } - if (keep) - uInstr2(cb, PUT, size, TempReg, tmpa, ArchReg, gregOfRM(rm)); - if (dis) VG_(printf)("%s%c %s,%s\n", t_x86opc, nameISize(size), - dis_buf,nameIReg(size,gregOfRM(rm))); - return HI8(pair)+eip0; - } else { - UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL); - Int tmpa = LOW24(pair); - Int tmp2 = newTemp(cb); - uInstr2(cb, LOAD, size, TempReg, tmpa, TempReg, tmpa); - uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, tmp2); - uInstr2(cb, opc, size, TempReg, tmpa, TempReg, tmp2); - setFlagsFromUOpcode(cb, opc); - if (keep) - uInstr2(cb, PUT, size, TempReg, tmp2, ArchReg, gregOfRM(rm)); - if (dis) VG_(printf)("%s%c %s,%s\n", t_x86opc, nameISize(size), - dis_buf,nameIReg(size,gregOfRM(rm))); - return HI8(pair)+eip0; - } -} - - - -/* Handle binary integer instructions of the form - op G, E meaning - op reg, reg-or-mem - Is passed the a ptr to the modRM byte, the actual operation, and the - data size. Returns the address advanced completely over this - instruction. - - G(src) is reg. - E(dst) is reg-or-mem - - If E is reg, --> GET %E, tmp - OP %G, tmp - PUT tmp, %E - - If E is mem, --> (getAddr E) -> tmpa - LD (tmpa), tmpv - OP %G, tmpv - ST tmpv, (tmpa) -*/ -static -Addr dis_op2_G_E ( UCodeBlock* cb, - Opcode opc, - Bool keep, - Int size, - Addr eip0, - Char* t_x86opc ) -{ - UChar rm = getUChar(eip0); - UChar dis_buf[50]; - - if (epartIsReg(rm)) { - Int tmp = newTemp(cb); - - /* Specially handle XOR reg,reg, because that doesn't really - depend on reg, and doing the obvious thing potentially - generates a spurious value check failure due to the bogus - dependency. */ - if (opc == XOR && gregOfRM(rm) == eregOfRM(rm)) { - codegen_XOR_reg_with_itself ( cb, size, gregOfRM(rm), tmp ); - return 1+eip0; - } - - uInstr2(cb, GET, size, ArchReg, eregOfRM(rm), TempReg, tmp); - - if (opc == AND || opc == OR) { - Int tao = newTemp(cb); - uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, tao); - uInstr2(cb, opc, size, TempReg, tao, TempReg, tmp); - setFlagsFromUOpcode(cb, opc); - } else { - uInstr2(cb, opc, size, ArchReg, gregOfRM(rm), TempReg, tmp); - setFlagsFromUOpcode(cb, opc); - } - if (keep) - uInstr2(cb, PUT, size, TempReg, tmp, ArchReg, eregOfRM(rm)); - if (dis) VG_(printf)("%s%c %s,%s\n", t_x86opc, nameISize(size), - nameIReg(size,gregOfRM(rm)), - nameIReg(size,eregOfRM(rm))); - return 1+eip0; - } - - /* E refers to memory */ - { - UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL); - Int tmpa = LOW24(pair); - Int tmpv = newTemp(cb); - uInstr2(cb, LOAD, size, TempReg, tmpa, TempReg, tmpv); - - if (opc == AND || opc == OR) { - Int tao = newTemp(cb); - uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, tao); - uInstr2(cb, opc, size, TempReg, tao, TempReg, tmpv); - setFlagsFromUOpcode(cb, opc); - } else { - uInstr2(cb, opc, size, ArchReg, gregOfRM(rm), TempReg, tmpv); - setFlagsFromUOpcode(cb, opc); - } - if (keep) { - uInstr2(cb, STORE, size, TempReg, tmpv, TempReg, tmpa); - SMC_IF_ALL(cb); - } - if (dis) VG_(printf)("%s%c %s,%s\n", t_x86opc, nameISize(size), - nameIReg(size,gregOfRM(rm)), dis_buf); - return HI8(pair)+eip0; - } -} - - -/* Handle move instructions of the form - mov E, G meaning - mov reg-or-mem, reg - Is passed the a ptr to the modRM byte, and the data size. Returns - the address advanced completely over this instruction. - - E(src) is reg-or-mem - G(dst) is reg. - - If E is reg, --> GET %G, tmpv - PUT tmpv, %G - - If E is mem --> (getAddr E) -> tmpa - LD (tmpa), tmpb - PUT tmpb, %G -*/ -static -Addr dis_mov_E_G ( UCodeBlock* cb, - Int size, - Addr eip0 ) -{ - UChar rm = getUChar(eip0); - UChar dis_buf[50]; - - if (epartIsReg(rm)) { - Int tmpv = newTemp(cb); - uInstr2(cb, GET, size, ArchReg, eregOfRM(rm), TempReg, tmpv); - uInstr2(cb, PUT, size, TempReg, tmpv, ArchReg, gregOfRM(rm)); - if (dis) VG_(printf)("mov%c %s,%s\n", nameISize(size), - nameIReg(size,eregOfRM(rm)), - nameIReg(size,gregOfRM(rm))); - return 1+eip0; - } - - /* E refers to memory */ - { - UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL); - Int tmpa = LOW24(pair); - Int tmpb = newTemp(cb); - uInstr2(cb, LOAD, size, TempReg, tmpa, TempReg, tmpb); - uInstr2(cb, PUT, size, TempReg, tmpb, ArchReg, gregOfRM(rm)); - if (dis) VG_(printf)("mov%c %s,%s\n", nameISize(size), - dis_buf,nameIReg(size,gregOfRM(rm))); - return HI8(pair)+eip0; - } -} - - -/* Handle move instructions of the form - mov G, E meaning - mov reg, reg-or-mem - Is passed the a ptr to the modRM byte, and the data size. Returns - the address advanced completely over this instruction. - - G(src) is reg. - E(dst) is reg-or-mem - - If E is reg, --> GET %G, tmp - PUT tmp, %E - - If E is mem, --> (getAddr E) -> tmpa - GET %G, tmpv - ST tmpv, (tmpa) -*/ -static -Addr dis_mov_G_E ( UCodeBlock* cb, - Int size, - Addr eip0 ) -{ - UChar rm = getUChar(eip0); - UChar dis_buf[50]; - - if (epartIsReg(rm)) { - Int tmpv = newTemp(cb); - uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, tmpv); - uInstr2(cb, PUT, size, TempReg, tmpv, ArchReg, eregOfRM(rm)); - if (dis) VG_(printf)("mov%c %s,%s\n", nameISize(size), - nameIReg(size,gregOfRM(rm)), - nameIReg(size,eregOfRM(rm))); - return 1+eip0; - } - - /* E refers to memory */ - { - UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL); - Int tmpa = LOW24(pair); - Int tmpv = newTemp(cb); - uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, tmpv); - uInstr2(cb, STORE, size, TempReg, tmpv, TempReg, tmpa); - SMC_IF_SOME(cb); - if (dis) VG_(printf)("mov%c %s,%s\n", nameISize(size), - nameIReg(size,gregOfRM(rm)), dis_buf); - return HI8(pair)+eip0; - } -} - - -/* op $immediate, AL/AX/EAX. */ -static -Addr dis_op_imm_A ( UCodeBlock* cb, - Int size, - Opcode opc, - Bool keep, - Addr eip, - Char* t_x86opc ) -{ - Int tmp = newTemp(cb); - UInt lit = getUDisp(size,eip); - uInstr2(cb, GET, size, ArchReg, R_EAX, TempReg, tmp); - if (opc == AND || opc == OR) { - Int tao = newTemp(cb); - uInstr2(cb, MOV, size, Literal, 0, TempReg, tao); - uLiteral(cb, lit); - uInstr2(cb, opc, size, TempReg, tao, TempReg, tmp); - setFlagsFromUOpcode(cb, opc); - } else { - uInstr2(cb, opc, size, Literal, 0, TempReg, tmp); - uLiteral(cb, lit); - setFlagsFromUOpcode(cb, opc); - } - if (keep) - uInstr2(cb, PUT, size, TempReg, tmp, ArchReg, R_EAX); - if (dis) VG_(printf)("%s%c $0x%x, %s\n", t_x86opc, nameISize(size), - lit, nameIReg(size,R_EAX)); - return eip+size; -} - - -/* Sign- and Zero-extending moves. */ -static -Addr dis_movx_E_G ( UCodeBlock* cb, - Addr eip, Int szs, Int szd, Bool sign_extend ) -{ - UChar dis_buf[50]; - UChar rm = getUChar(eip); - if (epartIsReg(rm)) { - Int tmpv = newTemp(cb); - uInstr2(cb, GET, szs, ArchReg, eregOfRM(rm), TempReg, tmpv); - uInstr1(cb, WIDEN, szd, TempReg, tmpv); - LAST_UINSTR(cb).extra4b = szs; - LAST_UINSTR(cb).signed_widen = sign_extend; - uInstr2(cb, PUT, szd, TempReg, tmpv, ArchReg, gregOfRM(rm)); - if (dis) VG_(printf)("mov%c%c%c %s,%s\n", - sign_extend ? 's' : 'z', - nameISize(szs), nameISize(szd), - nameIReg(szs,eregOfRM(rm)), - nameIReg(szd,gregOfRM(rm))); - return 1+eip; - } - - /* E refers to memory */ - { - UInt pair = disAMode ( cb, eip, dis?dis_buf:NULL); - Int tmpa = LOW24(pair); - uInstr2(cb, LOAD, szs, TempReg, tmpa, TempReg, tmpa); - uInstr1(cb, WIDEN, szd, TempReg, tmpa); - LAST_UINSTR(cb).extra4b = szs; - LAST_UINSTR(cb).signed_widen = sign_extend; - uInstr2(cb, PUT, szd, TempReg, tmpa, ArchReg, gregOfRM(rm)); - if (dis) VG_(printf)("mov%c%c%c %s,%s\n", - sign_extend ? 's' : 'z', - nameISize(szs), nameISize(szd), - dis_buf, - nameIReg(szd,gregOfRM(rm))); - return HI8(pair)+eip; - } -} - - -/* Generate code to divide ArchRegs EDX:EAX / DX:AX / AX by the 32 / - 16 / 8 bit quantity in the given TempReg. */ -static -void codegen_div ( UCodeBlock* cb, Int sz, Int t, Bool signed_divide ) -{ - Int helper; - Int ta = newTemp(cb); - Int td = newTemp(cb); - - switch (sz) { - case 4: helper = (signed_divide ? VGOFF_(helper_idiv_64_32) - : VGOFF_(helper_div_64_32)); - break; - case 2: helper = (signed_divide ? VGOFF_(helper_idiv_32_16) - : VGOFF_(helper_div_32_16)); - break; - case 1: helper = (signed_divide ? VGOFF_(helper_idiv_16_8) - : VGOFF_(helper_div_16_8)); - break; - default: VG_(panic)("codegen_div"); - } - uInstr0(cb, CALLM_S, 0); - if (sz == 4 || sz == 2) { - uInstr1(cb, PUSH, sz, TempReg, t); - uInstr2(cb, GET, sz, ArchReg, R_EAX, TempReg, ta); - uInstr1(cb, PUSH, sz, TempReg, ta); - uInstr2(cb, GET, sz, ArchReg, R_EDX, TempReg, td); - uInstr1(cb, PUSH, sz, TempReg, td); - uInstr1(cb, CALLM, 0, Lit16, helper); - uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsOSZACP); - uInstr1(cb, POP, sz, TempReg, t); - uInstr2(cb, PUT, sz, TempReg, t, ArchReg, R_EDX); - uInstr1(cb, POP, sz, TempReg, t); - uInstr2(cb, PUT, sz, TempReg, t, ArchReg, R_EAX); - uInstr1(cb, CLEAR, 0, Lit16, 4); - } else { - uInstr1(cb, PUSH, 1, TempReg, t); - uInstr2(cb, GET, 2, ArchReg, R_EAX, TempReg, ta); - uInstr1(cb, PUSH, 2, TempReg, ta); - uInstr2(cb, MOV, 1, Literal, 0, TempReg, td); - uLiteral(cb, 0); - uInstr1(cb, PUSH, 1, TempReg, td); - uInstr1(cb, CALLM, 0, Lit16, helper); - uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsOSZACP); - uInstr1(cb, POP, 1, TempReg, t); - uInstr2(cb, PUT, 1, TempReg, t, ArchReg, R_AL); - uInstr1(cb, POP, 1, TempReg, t); - uInstr2(cb, PUT, 1, TempReg, t, ArchReg, R_AH); - uInstr1(cb, CLEAR, 0, Lit16, 4); - } - uInstr0(cb, CALLM_E, 0); -} - - -static -Addr dis_Grp1 ( UCodeBlock* cb, Addr eip, UChar modrm, - Int am_sz, Int d_sz, Int sz, UInt d32 ) -{ - Int t1, t2, uopc; - UInt pair; - UChar dis_buf[50]; - if (epartIsReg(modrm)) { - vg_assert(am_sz == 1); - t1 = newTemp(cb); - uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1); - switch (gregOfRM(modrm)) { - case 0: uopc = ADD; break; case 1: uopc = OR; break; - case 2: uopc = ADC; break; case 3: uopc = SBB; break; - case 4: uopc = AND; break; case 5: uopc = SUB; break; - case 6: uopc = XOR; break; case 7: uopc = SUB; break; - default: VG_(panic)("dis_Grp1(Reg): unhandled case"); - } - if (uopc == AND || uopc == OR) { - Int tao = newTemp(cb); - uInstr2(cb, MOV, sz, Literal, 0, TempReg, tao); - uLiteral(cb, d32); - uInstr2(cb, uopc, sz, TempReg, tao, TempReg, t1); - setFlagsFromUOpcode(cb, uopc); - } else { - uInstr2(cb, uopc, sz, Literal, 0, TempReg, t1); - uLiteral(cb, d32); - setFlagsFromUOpcode(cb, uopc); - } - if (gregOfRM(modrm) < 7) - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, eregOfRM(modrm)); - eip += (am_sz + d_sz); - if (dis) - VG_(printf)("%s%c $0x%x, %s\n", - nameGrp1(gregOfRM(modrm)), nameISize(sz), d32, - nameIReg(sz,eregOfRM(modrm))); - } else { - pair = disAMode ( cb, eip, dis?dis_buf:NULL); - t1 = LOW24(pair); - t2 = newTemp(cb); - eip += HI8(pair); - eip += d_sz; - uInstr2(cb, LOAD, sz, TempReg, t1, TempReg, t2); - switch (gregOfRM(modrm)) { - case 0: uopc = ADD; break; case 1: uopc = OR; break; - case 2: uopc = ADC; break; case 3: uopc = SBB; break; - case 4: uopc = AND; break; case 5: uopc = SUB; break; - case 6: uopc = XOR; break; case 7: uopc = SUB; break; - default: VG_(panic)("dis_Grp1(Mem): unhandled case"); - } - if (uopc == AND || uopc == OR) { - Int tao = newTemp(cb); - uInstr2(cb, MOV, sz, Literal, 0, TempReg, tao); - uLiteral(cb, d32); - uInstr2(cb, uopc, sz, TempReg, tao, TempReg, t2); - setFlagsFromUOpcode(cb, uopc); - } else { - uInstr2(cb, uopc, sz, Literal, 0, TempReg, t2); - uLiteral(cb, d32); - setFlagsFromUOpcode(cb, uopc); - } - if (gregOfRM(modrm) < 7) { - uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t1); - SMC_IF_ALL(cb); - } - if (dis) - VG_(printf)("%s%c $0x%x, %s\n", - nameGrp1(gregOfRM(modrm)), nameISize(sz), d32, - dis_buf); - } - return eip; -} - - -/* Group 2 extended opcodes. */ -static -Addr dis_Grp2 ( UCodeBlock* cb, Addr eip, UChar modrm, - Int am_sz, Int d_sz, Int sz, - Tag orig_src_tag, UInt orig_src_val ) -{ - /* orig_src_tag and orig_src_val denote either ArchReg(%CL) or a - Literal. And eip on entry points at the modrm byte. */ - Int t1, t2, uopc; - UInt pair; - UChar dis_buf[50]; - UInt src_val; - Tag src_tag; - - /* Get the amount to be shifted by into src_tag/src_val. */ - if (orig_src_tag == ArchReg) { - src_val = newTemp(cb); - src_tag = TempReg; - uInstr2(cb, GET, 1, orig_src_tag, orig_src_val, TempReg, src_val); - } else { - src_val = orig_src_val; - src_tag = Literal; - } - - if (epartIsReg(modrm)) { - vg_assert(am_sz == 1); - t1 = newTemp(cb); - uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1); - switch (gregOfRM(modrm)) { - case 0: uopc = ROL; break; case 1: uopc = ROR; break; - case 2: uopc = RCL; break; case 3: uopc = RCR; break; - case 4: uopc = SHL; break; case 5: uopc = SHR; break; - case 7: uopc = SAR; break; - default: VG_(panic)("dis_Grp2(Reg): unhandled case"); - } - if (src_tag == Literal) { - uInstr2(cb, uopc, sz, Literal, 0, TempReg, t1); - uLiteral(cb, src_val); - } else { - uInstr2(cb, uopc, sz, src_tag, src_val, TempReg, t1); - } - setFlagsFromUOpcode(cb, uopc); - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, eregOfRM(modrm)); - eip += (am_sz + d_sz); - if (dis) { - if (orig_src_tag == Literal) - VG_(printf)("%s%c $0x%x, %s\n", - nameGrp2(gregOfRM(modrm)), nameISize(sz), - orig_src_val, nameIReg(sz,eregOfRM(modrm))); - else - VG_(printf)("%s%c %s, %s\n", - nameGrp2(gregOfRM(modrm)), nameISize(sz), - nameIReg(1,orig_src_val), - nameIReg(sz,eregOfRM(modrm))); - } - } else { - pair = disAMode ( cb, eip, dis?dis_buf:NULL); - t1 = LOW24(pair); - t2 = newTemp(cb); - eip += HI8(pair); - eip += d_sz; - uInstr2(cb, LOAD, sz, TempReg, t1, TempReg, t2); - switch (gregOfRM(modrm)) { - case 0: uopc = ROL; break; case 1: uopc = ROR; break; - case 2: uopc = RCL; break; case 3: uopc = RCR; break; - case 4: uopc = SHL; break; case 5: uopc = SHR; break; - case 7: uopc = SAR; break; - default: VG_(panic)("dis_Grp2(Reg): unhandled case"); - } - if (src_tag == Literal) { - uInstr2(cb, uopc, sz, Literal, 0, TempReg, t2); - uLiteral(cb, src_val); - } else { - uInstr2(cb, uopc, sz, src_tag, src_val, TempReg, t2); - } - setFlagsFromUOpcode(cb, uopc); - uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t1); - SMC_IF_ALL(cb); - if (dis) { - if (orig_src_tag == Literal) - VG_(printf)("%s%c $0x%x, %s\n", - nameGrp2(gregOfRM(modrm)), nameISize(sz), - orig_src_val, dis_buf); - else - VG_(printf)("%s%c %s, %s\n", - nameGrp2(gregOfRM(modrm)), nameISize(sz), - nameIReg(1,orig_src_val), - dis_buf); - } - } - return eip; -} - - - -/* Group 8 extended opcodes (but BT/BTS/BTC/BTR only). */ -static -Addr dis_Grp8_BT ( UCodeBlock* cb, Addr eip, UChar modrm, - Int am_sz, Int sz, UInt src_val ) -{ -# define MODIFY_t2_AND_SET_CARRY_FLAG \ - /* t2 is the value to be op'd on. Copy to t_fetched, then \ - modify t2, if non-BT. */ \ - uInstr2(cb, MOV, 4, TempReg, t2, TempReg, t_fetched); \ - uInstr2(cb, MOV, sz, Literal, 0, TempReg, t_mask); \ - uLiteral(cb, v_mask); \ - switch (gregOfRM(modrm)) { \ - case 4: /* BT */ break; \ - case 5: /* BTS */ \ - uInstr2(cb, OR, sz, TempReg, t_mask, TempReg, t2); break; \ - case 6: /* BTR */ \ - uInstr2(cb, AND, sz, TempReg, t_mask, TempReg, t2); break; \ - case 7: /* BTC */ \ - uInstr2(cb, XOR, sz, TempReg, t_mask, TempReg, t2); break; \ - } \ - /* Copy relevant bit from t_fetched into carry flag. */ \ - uInstr2(cb, SHR, sz, Literal, 0, TempReg, t_fetched); \ - uLiteral(cb, src_val); \ - uInstr2(cb, MOV, sz, Literal, 0, TempReg, t_mask); \ - uLiteral(cb, 1); \ - uInstr2(cb, AND, sz, TempReg, t_mask, TempReg, t_fetched); \ - uInstr1(cb, NEG, sz, TempReg, t_fetched); \ - setFlagsFromUOpcode(cb, NEG); - - - /* src_val denotes a d8. - And eip on entry points at the modrm byte. */ - Int t1, t2, t_fetched, t_mask; - UInt pair; - UChar dis_buf[50]; - UInt v_mask; - - /* There is no 1-byte form of this instruction, AFAICS. */ - vg_assert(sz == 2 || sz == 4); - - /* Limit src_val -- the bit offset -- to something within a word. - The Intel docs say that literal offsets larger than a word are - masked in this way. */ - switch (sz) { - case 2: src_val &= 15; break; - case 4: src_val &= 31; break; - default: VG_(panic)("dis_Grp8_BT: invalid size"); - } - - /* Invent a mask suitable for the operation. */ - - switch (gregOfRM(modrm)) { - case 4: /* BT */ v_mask = 0; break; - case 5: /* BTS */ v_mask = 1 << src_val; break; - case 6: /* BTR */ v_mask = ~(1 << src_val); break; - case 7: /* BTC */ v_mask = 1 << src_val; break; - /* If this needs to be extended, probably simplest to make a - new function to handle the other cases (0 .. 3). The - Intel docs do however not indicate any use for 0 .. 3, so - we don't expect this to happen. */ - default: VG_(panic)("dis_Grp8_BT"); - } - /* Probably excessively paranoid. */ - if (sz == 2) - v_mask &= 0x0000FFFF; - - t1 = INVALID_TEMPREG; - t_fetched = newTemp(cb); - t_mask = newTemp(cb); - - if (epartIsReg(modrm)) { - vg_assert(am_sz == 1); - t2 = newTemp(cb); - - /* Fetch the value to be tested and modified. */ - uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t2); - /* Do it! */ - MODIFY_t2_AND_SET_CARRY_FLAG; - /* Dump the result back, if non-BT. */ - if (gregOfRM(modrm) != 4 /* BT */) - uInstr2(cb, PUT, sz, TempReg, t2, ArchReg, eregOfRM(modrm)); - - eip += (am_sz + 1); - if (dis) - VG_(printf)("%s%c $0x%x, %s\n", - nameGrp8(gregOfRM(modrm)), nameISize(sz), - src_val, - nameIReg(sz,eregOfRM(modrm))); - } else { - pair = disAMode ( cb, eip, dis?dis_buf:NULL); - t1 = LOW24(pair); - t2 = newTemp(cb); - eip += HI8(pair); - eip += 1; - - /* Fetch the value to be tested and modified. */ - uInstr2(cb, LOAD, sz, TempReg, t1, TempReg, t2); - /* Do it! */ - MODIFY_t2_AND_SET_CARRY_FLAG; - /* Dump the result back, if non-BT. */ - if (gregOfRM(modrm) != 4 /* BT */) { - uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t1); - SMC_IF_ALL(cb); - } - if (dis) - VG_(printf)("%s%c $0x%x, %s\n", - nameGrp8(gregOfRM(modrm)), nameISize(sz), src_val, - dis_buf); - } - return eip; - -# undef MODIFY_t2_AND_SET_CARRY_FLAG -} - - - -/* Generate ucode to multiply the value in EAX/AX/AL by the register - specified by the ereg of modrm, and park the result in - EDX:EAX/DX:AX/AX. */ -static void codegen_mul_A_D_Reg ( UCodeBlock* cb, Int sz, - UChar modrm, Bool signed_multiply ) -{ - Int helper = signed_multiply - ? - (sz==1 ? VGOFF_(helper_imul_8_16) - : (sz==2 ? VGOFF_(helper_imul_16_32) - : VGOFF_(helper_imul_32_64))) - : - (sz==1 ? VGOFF_(helper_mul_8_16) - : (sz==2 ? VGOFF_(helper_mul_16_32) - : VGOFF_(helper_mul_32_64))); - Int t1 = newTemp(cb); - Int ta = newTemp(cb); - uInstr0(cb, CALLM_S, 0); - uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1); - uInstr1(cb, PUSH, sz, TempReg, t1); - uInstr2(cb, GET, sz, ArchReg, R_EAX, TempReg, ta); - uInstr1(cb, PUSH, sz, TempReg, ta); - uInstr1(cb, CALLM, 0, Lit16, helper); - uFlagsRWU(cb, FlagsEmpty, FlagsOC, FlagsSZAP); - if (sz > 1) { - uInstr1(cb, POP, sz, TempReg, t1); - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, R_EDX); - uInstr1(cb, POP, sz, TempReg, t1); - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, R_EAX); - } else { - uInstr1(cb, CLEAR, 0, Lit16, 4); - uInstr1(cb, POP, 2, TempReg, t1); - uInstr2(cb, PUT, 2, TempReg, t1, ArchReg, R_EAX); - } - uInstr0(cb, CALLM_E, 0); - if (dis) VG_(printf)("%s%c %s\n", signed_multiply ? "imul" : "mul", - nameISize(sz), nameIReg(sz, eregOfRM(modrm))); - -} - - -/* Generate ucode to multiply the value in EAX/AX/AL by the value in - TempReg temp, and park the result in EDX:EAX/DX:AX/AX. */ -static void codegen_mul_A_D_Temp ( UCodeBlock* cb, Int sz, - Int temp, Bool signed_multiply, - UChar* dis_buf ) -{ - Int helper = signed_multiply - ? - (sz==1 ? VGOFF_(helper_imul_8_16) - : (sz==2 ? VGOFF_(helper_imul_16_32) - : VGOFF_(helper_imul_32_64))) - : - (sz==1 ? VGOFF_(helper_mul_8_16) - : (sz==2 ? VGOFF_(helper_mul_16_32) - : VGOFF_(helper_mul_32_64))); - Int t1 = newTemp(cb); - uInstr0(cb, CALLM_S, 0); - uInstr1(cb, PUSH, sz, TempReg, temp); - uInstr2(cb, GET, sz, ArchReg, R_EAX, TempReg, t1); - uInstr1(cb, PUSH, sz, TempReg, t1); - uInstr1(cb, CALLM, 0, Lit16, helper); - uFlagsRWU(cb, FlagsEmpty, FlagsOC, FlagsSZAP); - if (sz > 1) { - uInstr1(cb, POP, sz, TempReg, t1); - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, R_EDX); - uInstr1(cb, POP, sz, TempReg, t1); - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, R_EAX); - } else { - uInstr1(cb, CLEAR, 0, Lit16, 4); - uInstr1(cb, POP, 2, TempReg, t1); - uInstr2(cb, PUT, 2, TempReg, t1, ArchReg, R_EAX); - } - uInstr0(cb, CALLM_E, 0); - if (dis) VG_(printf)("%s%c %s\n", signed_multiply ? "imul" : "mul", - nameISize(sz), dis_buf); -} - - -/* Group 3 extended opcodes. */ -static -Addr dis_Grp3 ( UCodeBlock* cb, Int sz, Addr eip ) -{ - Int t1, t2; - UInt pair, d32; - UChar modrm; - UChar dis_buf[50]; - t1 = t2 = INVALID_TEMPREG; - modrm = getUChar(eip); - if (epartIsReg(modrm)) { - t1 = newTemp(cb); - switch (gregOfRM(modrm)) { - case 0: { /* TEST */ - Int tao = newTemp(cb); - eip++; d32 = getUDisp(sz, eip); eip += sz; - uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1); - uInstr2(cb, MOV, sz, Literal, 0, TempReg, tao); - uLiteral(cb, d32); - uInstr2(cb, AND, sz, TempReg, tao, TempReg, t1); - setFlagsFromUOpcode(cb, AND); - if (dis) - VG_(printf)("test%c $0x%x, %s\n", - nameISize(sz), d32, nameIReg(sz, eregOfRM(modrm))); - break; - } - case 2: /* NOT */ - eip++; - uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1); - uInstr1(cb, NOT, sz, TempReg, t1); - setFlagsFromUOpcode(cb, NOT); - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, eregOfRM(modrm)); - if (dis) - VG_(printf)("not%c %s\n", - nameISize(sz), nameIReg(sz, eregOfRM(modrm))); - break; - case 3: /* NEG */ - eip++; - uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1); - uInstr1(cb, NEG, sz, TempReg, t1); - setFlagsFromUOpcode(cb, NEG); - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, eregOfRM(modrm)); - if (dis) - VG_(printf)("neg%c %s\n", - nameISize(sz), nameIReg(sz, eregOfRM(modrm))); - break; - case 4: /* MUL */ - eip++; - codegen_mul_A_D_Reg ( cb, sz, modrm, False ); - break; - case 5: /* IMUL */ - eip++; - codegen_mul_A_D_Reg ( cb, sz, modrm, True ); - break; - case 6: /* DIV */ - eip++; - uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1); - codegen_div ( cb, sz, t1, False ); - if (dis) - VG_(printf)("div%c %s\n", nameISize(sz), - nameIReg(sz, eregOfRM(modrm))); - break; - case 7: /* IDIV */ - eip++; - uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1); - codegen_div ( cb, sz, t1, True ); - if (dis) - VG_(printf)("idiv%c %s\n", nameISize(sz), - nameIReg(sz, eregOfRM(modrm))); - break; - default: - VG_(printf)( - "unhandled Grp3(R) case %d\n", (UInt)gregOfRM(modrm)); - VG_(panic)("Grp3"); - } - } else { - pair = disAMode ( cb, eip, dis?dis_buf:NULL ); - t2 = LOW24(pair); - t1 = newTemp(cb); - eip += HI8(pair); - uInstr2(cb, LOAD, sz, TempReg, t2, TempReg, t1); - switch (gregOfRM(modrm)) { - case 0: { /* TEST */ - Int tao = newTemp(cb); - d32 = getUDisp(sz, eip); eip += sz; - uInstr2(cb, MOV, sz, Literal, 0, TempReg, tao); - uLiteral(cb, d32); - uInstr2(cb, AND, sz, TempReg, tao, TempReg, t1); - setFlagsFromUOpcode(cb, AND); - if (dis) - VG_(printf)("test%c $0x%x, %s\n", - nameISize(sz), d32, dis_buf); - break; - } - case 2: /* NOT */ - uInstr1(cb, NOT, sz, TempReg, t1); - setFlagsFromUOpcode(cb, NOT); - uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2); - SMC_IF_ALL(cb); - if (dis) - VG_(printf)("not%c %s\n", nameISize(sz), dis_buf); - break; - case 3: /* NEG */ - uInstr1(cb, NEG, sz, TempReg, t1); - setFlagsFromUOpcode(cb, NEG); - uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2); - SMC_IF_ALL(cb); - if (dis) - VG_(printf)("neg%c %s\n", nameISize(sz), dis_buf); - break; - case 4: /* MUL */ - codegen_mul_A_D_Temp ( cb, sz, t1, False, - dis?dis_buf:NULL ); - break; - case 5: /* IMUL */ - codegen_mul_A_D_Temp ( cb, sz, t1, True, dis?dis_buf:NULL ); - break; - case 6: /* DIV */ - codegen_div ( cb, sz, t1, False ); - if (dis) - VG_(printf)("div%c %s\n", nameISize(sz), dis_buf); - break; - case 7: /* IDIV */ - codegen_div ( cb, sz, t1, True ); - if (dis) - VG_(printf)("idiv%c %s\n", nameISize(sz), dis_buf); - break; - default: - VG_(printf)( - "unhandled Grp3(M) case %d\n", (UInt)gregOfRM(modrm)); - VG_(panic)("Grp3"); - } - } - return eip; -} - - -/* Group 4 extended opcodes. */ -static -Addr dis_Grp4 ( UCodeBlock* cb, Addr eip ) -{ - Int t1, t2; - UInt pair; - UChar modrm; - UChar dis_buf[50]; - t1 = t2 = INVALID_TEMPREG; - - modrm = getUChar(eip); - if (epartIsReg(modrm)) { - t1 = newTemp(cb); - uInstr2(cb, GET, 1, ArchReg, eregOfRM(modrm), TempReg, t1); - switch (gregOfRM(modrm)) { - case 0: /* INC */ - uInstr1(cb, INC, 1, TempReg, t1); - setFlagsFromUOpcode(cb, INC); - uInstr2(cb, PUT, 1, TempReg, t1, ArchReg, eregOfRM(modrm)); - break; - case 1: /* DEC */ - uInstr1(cb, DEC, 1, TempReg, t1); - setFlagsFromUOpcode(cb, DEC); - uInstr2(cb, PUT, 1, TempReg, t1, ArchReg, eregOfRM(modrm)); - break; - default: - VG_(printf)( - "unhandled Grp4(R) case %d\n", (UInt)gregOfRM(modrm)); - VG_(panic)("Grp4"); - } - eip++; - if (dis) - VG_(printf)("%sb %s\n", nameGrp4(gregOfRM(modrm)), - nameIReg(1, eregOfRM(modrm))); - } else { - pair = disAMode ( cb, eip, dis?dis_buf:NULL ); - t2 = LOW24(pair); - t1 = newTemp(cb); - uInstr2(cb, LOAD, 1, TempReg, t2, TempReg, t1); - switch (gregOfRM(modrm)) { - case 0: /* INC */ - uInstr1(cb, INC, 1, TempReg, t1); - setFlagsFromUOpcode(cb, INC); - uInstr2(cb, STORE, 1, TempReg, t1, TempReg, t2); - SMC_IF_ALL(cb); - break; - case 1: /* DEC */ - uInstr1(cb, DEC, 1, TempReg, t1); - setFlagsFromUOpcode(cb, DEC); - uInstr2(cb, STORE, 1, TempReg, t1, TempReg, t2); - SMC_IF_ALL(cb); - break; - default: - VG_(printf)( - "unhandled Grp4(M) case %d\n", (UInt)gregOfRM(modrm)); - VG_(panic)("Grp4"); - } - eip += HI8(pair); - if (dis) - VG_(printf)("%sb %s\n", nameGrp4(gregOfRM(modrm)), dis_buf); - } - return eip; -} - - -/* Group 5 extended opcodes. */ -static -Addr dis_Grp5 ( UCodeBlock* cb, Int sz, Addr eip, Bool* isEnd ) -{ - Int t1, t2, t3, t4; - UInt pair; - UChar modrm; - UChar dis_buf[50]; - t1 = t2 = t3 = t4 = INVALID_TEMPREG; - - modrm = getUChar(eip); - if (epartIsReg(modrm)) { - t1 = newTemp(cb); - uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1); - switch (gregOfRM(modrm)) { - case 0: /* INC */ - uInstr1(cb, INC, sz, TempReg, t1); - setFlagsFromUOpcode(cb, INC); - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, eregOfRM(modrm)); - break; - case 1: /* DEC */ - uInstr1(cb, DEC, sz, TempReg, t1); - setFlagsFromUOpcode(cb, DEC); - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, eregOfRM(modrm)); - break; - case 2: /* call Ev */ - t3 = newTemp(cb); t4 = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t3); - uInstr2(cb, SUB, 4, Literal, 0, TempReg, t3); - uLiteral(cb, 4); - uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, R_ESP); - uInstr2(cb, MOV, 4, Literal, 0, TempReg, t4); - uLiteral(cb, eip+1); - uInstr2(cb, STORE, 4, TempReg, t4, TempReg, t3); - SMC_IF_ALL(cb); - uInstr1(cb, JMP, 0, TempReg, t1); - uCond(cb, CondAlways); - LAST_UINSTR(cb).jmpkind = JmpCall; - *isEnd = True; - break; - case 4: /* jmp Ev */ - uInstr1(cb, JMP, 0, TempReg, t1); - uCond(cb, CondAlways); - *isEnd = True; - break; - default: - VG_(printf)( - "unhandled Grp5(R) case %d\n", (UInt)gregOfRM(modrm)); - VG_(panic)("Grp5"); - } - eip++; - if (dis) - VG_(printf)("%s%c %s\n", nameGrp5(gregOfRM(modrm)), - nameISize(sz), nameIReg(sz, eregOfRM(modrm))); - } else { - pair = disAMode ( cb, eip, dis?dis_buf:NULL ); - t2 = LOW24(pair); - t1 = newTemp(cb); - uInstr2(cb, LOAD, sz, TempReg, t2, TempReg, t1); - switch (gregOfRM(modrm)) { - case 0: /* INC */ - uInstr1(cb, INC, sz, TempReg, t1); - setFlagsFromUOpcode(cb, INC); - uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2); - SMC_IF_ALL(cb); - break; - case 1: /* DEC */ - uInstr1(cb, DEC, sz, TempReg, t1); - setFlagsFromUOpcode(cb, DEC); - uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2); - SMC_IF_ALL(cb); - break; - case 2: /* call Ev */ - t3 = newTemp(cb); t4 = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t3); - uInstr2(cb, SUB, 4, Literal, 0, TempReg, t3); - uLiteral(cb, 4); - uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, R_ESP); - uInstr2(cb, MOV, 4, Literal, 0, TempReg, t4); - uLiteral(cb, eip+HI8(pair)); - uInstr2(cb, STORE, 4, TempReg, t4, TempReg, t3); - SMC_IF_ALL(cb); - uInstr1(cb, JMP, 0, TempReg, t1); - uCond(cb, CondAlways); - LAST_UINSTR(cb).jmpkind = JmpCall; - *isEnd = True; - break; - case 4: /* JMP Ev */ - uInstr1(cb, JMP, 0, TempReg, t1); - uCond(cb, CondAlways); - *isEnd = True; - break; - case 6: /* PUSH Ev */ - t3 = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t3); - uInstr2(cb, SUB, 4, Literal, 0, TempReg, t3); - uLiteral(cb, sz); - uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, R_ESP); - uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t3); - SMC_IF_ALL(cb); - break; - default: - VG_(printf)( - "unhandled Grp5(M) case %d\n", (UInt)gregOfRM(modrm)); - VG_(panic)("Grp5"); - } - eip += HI8(pair); - if (dis) - VG_(printf)("%s%c %s\n", nameGrp5(gregOfRM(modrm)), - nameISize(sz), dis_buf); - } - return eip; -} - - -/* Template for REPE CMPS. Assumes this insn is the last one in - the basic block, and so emits a jump to the next insn. */ -static -void codegen_REPE_CMPS ( UCodeBlock* cb, Int sz, Addr eip, Addr eip_next ) -{ - Int tc, /* ECX */ - td, /* EDI */ ts, /* ESI */ - tdv, /* (EDI) */ tsv /* (ESI) */; - - tdv = newTemp(cb); - tsv = newTemp(cb); - td = newTemp(cb); - ts = newTemp(cb); - tc = newTemp(cb); - - uInstr2(cb, GET, 4, ArchReg, R_ECX, TempReg, tc); - uInstr2(cb, JIFZ, 4, TempReg, tc, Literal, 0); - uLiteral(cb, eip_next); - uInstr1(cb, DEC, 4, TempReg, tc); - uInstr2(cb, PUT, 4, TempReg, tc, ArchReg, R_ECX); - - uInstr2(cb, GET, 4, ArchReg, R_EDI, TempReg, td); - uInstr2(cb, GET, 4, ArchReg, R_ESI, TempReg, ts); - - uInstr2(cb, LOAD, sz, TempReg, td, TempReg, tdv); - uInstr2(cb, LOAD, sz, TempReg, ts, TempReg, tsv); - - uInstr2(cb, SUB, sz, TempReg, tdv, TempReg, tsv); - setFlagsFromUOpcode(cb, SUB); - - uInstr0(cb, CALLM_S, 0); - uInstr2(cb, MOV, 4, Literal, 0, TempReg, tdv); - uLiteral(cb, 0); - uInstr1(cb, PUSH, 4, TempReg, tdv); - - uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag)); - uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty); - - uInstr1(cb, POP, 4, TempReg, tdv); - uInstr0(cb, CALLM_E, 0); - if (sz == 4 || sz == 2) { - uInstr2(cb, SHL, 4, Literal, 0, TempReg, tdv); - uLiteral(cb, sz/2); - } - uInstr2(cb, ADD, 4, TempReg, tdv, TempReg, td); - uInstr2(cb, ADD, 4, TempReg, tdv, TempReg, ts); - - uInstr2(cb, PUT, 4, TempReg, td, ArchReg, R_EDI); - uInstr2(cb, PUT, 4, TempReg, ts, ArchReg, R_ESI); - - uInstr1(cb, JMP, 0, Literal, 0); - uLiteral(cb, eip); - uCond(cb, CondZ); - uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty); - uInstr1(cb, JMP, 0, Literal, 0); - uLiteral(cb, eip_next); - uCond(cb, CondAlways); -} - - -/* Template for REPNE SCAS. Assumes this insn is the last one in - the basic block, and so emits a jump to the next insn. */ -static -void codegen_REPNE_SCAS ( UCodeBlock* cb, Int sz, Addr eip, Addr eip_next ) -{ - Int ta /* EAX */, tc /* ECX */, td /* EDI */, tv; - ta = newTemp(cb); - tc = newTemp(cb); - tv = newTemp(cb); - td = newTemp(cb); - - uInstr2(cb, GET, 4, ArchReg, R_ECX, TempReg, tc); - uInstr2(cb, JIFZ, 4, TempReg, tc, Literal, 0); - uLiteral(cb, eip_next); - uInstr1(cb, DEC, 4, TempReg, tc); - uInstr2(cb, PUT, 4, TempReg, tc, ArchReg, R_ECX); - - uInstr2(cb, GET, sz, ArchReg, R_EAX, TempReg, ta); - uInstr2(cb, GET, 4, ArchReg, R_EDI, TempReg, td); - uInstr2(cb, LOAD, sz, TempReg, td, TempReg, tv); - /* next uinstr kills ta, but that's ok -- don't need it again */ - uInstr2(cb, SUB, sz, TempReg, tv, TempReg, ta); - setFlagsFromUOpcode(cb, SUB); - - uInstr0(cb, CALLM_S, 0); - uInstr2(cb, MOV, 4, Literal, 0, TempReg, tv); - uLiteral(cb, 0); - uInstr1(cb, PUSH, 4, TempReg, tv); - - uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag)); - uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty); - - uInstr1(cb, POP, 4, TempReg, tv); - uInstr0(cb, CALLM_E, 0); - - if (sz == 4 || sz == 2) { - uInstr2(cb, SHL, 4, Literal, 0, TempReg, tv); - uLiteral(cb, sz/2); - } - uInstr2(cb, ADD, 4, TempReg, tv, TempReg, td); - uInstr2(cb, PUT, 4, TempReg, td, ArchReg, R_EDI); - uInstr1(cb, JMP, 0, Literal, 0); - uLiteral(cb, eip); - uCond(cb, CondNZ); - uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty); - uInstr1(cb, JMP, 0, Literal, 0); - uLiteral(cb, eip_next); - uCond(cb, CondAlways); -} - - -/* Template for REPE MOVS. Assumes this insn is the last one in - the basic block, and so emits a jump to the next insn. */ -static -void codegen_REPE_MOVS ( UCodeBlock* cb, Int sz, Addr eip, Addr eip_next ) -{ - Int ts /* ESI */, tc /* ECX */, td /* EDI */, tv; - tc = newTemp(cb); - td = newTemp(cb); - ts = newTemp(cb); - tv = newTemp(cb); - - uInstr2(cb, GET, 4, ArchReg, R_ECX, TempReg, tc); - uInstr2(cb, JIFZ, 4, TempReg, tc, Literal, 0); - uLiteral(cb, eip_next); - uInstr1(cb, DEC, 4, TempReg, tc); - uInstr2(cb, PUT, 4, TempReg, tc, ArchReg, R_ECX); - - uInstr2(cb, GET, 4, ArchReg, R_EDI, TempReg, td); - uInstr2(cb, GET, 4, ArchReg, R_ESI, TempReg, ts); - - uInstr2(cb, LOAD, sz, TempReg, ts, TempReg, tv); - uInstr2(cb, STORE, sz, TempReg, tv, TempReg, td); - SMC_IF_SOME(cb); - - uInstr0(cb, CALLM_S, 0); - uInstr2(cb, MOV, 4, Literal, 0, TempReg, tv); - uLiteral(cb, 0); - uInstr1(cb, PUSH, 4, TempReg, tv); - - uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag)); - uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty); - - uInstr1(cb, POP, 4, TempReg, tv); - uInstr0(cb, CALLM_E, 0); - - if (sz == 4 || sz == 2) { - uInstr2(cb, SHL, 4, Literal, 0, TempReg, tv); - uLiteral(cb, sz/2); - } - uInstr2(cb, ADD, 4, TempReg, tv, TempReg, td); - uInstr2(cb, ADD, 4, TempReg, tv, TempReg, ts); - - uInstr2(cb, PUT, 4, TempReg, td, ArchReg, R_EDI); - uInstr2(cb, PUT, 4, TempReg, ts, ArchReg, R_ESI); - - uInstr1(cb, JMP, 0, Literal, 0); - uLiteral(cb, eip); - uCond(cb, CondAlways); -} - - -/* Template for REPE STOS. Assumes this insn is the last one in - the basic block, and so emits a jump to the next insn. */ -static -void codegen_REPE_STOS ( UCodeBlock* cb, Int sz, Addr eip, Addr eip_next ) -{ - Int ta /* EAX */, tc /* ECX */, td /* EDI */; - ta = newTemp(cb); - tc = newTemp(cb); - td = newTemp(cb); - - uInstr2(cb, GET, 4, ArchReg, R_ECX, TempReg, tc); - uInstr2(cb, JIFZ, 4, TempReg, tc, Literal, 0); - uLiteral(cb, eip_next); - uInstr1(cb, DEC, 4, TempReg, tc); - uInstr2(cb, PUT, 4, TempReg, tc, ArchReg, R_ECX); - - uInstr2(cb, GET, sz, ArchReg, R_EAX, TempReg, ta); - uInstr2(cb, GET, 4, ArchReg, R_EDI, TempReg, td); - uInstr2(cb, STORE, sz, TempReg, ta, TempReg, td); - SMC_IF_SOME(cb); - - uInstr0(cb, CALLM_S, 0); - uInstr2(cb, MOV, 4, Literal, 0, TempReg, ta); - uLiteral(cb, 0); - uInstr1(cb, PUSH, 4, TempReg, ta); - - uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag)); - uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty); - - uInstr1(cb, POP, 4, TempReg, ta); - uInstr0(cb, CALLM_E, 0); - - if (sz == 4 || sz == 2) { - uInstr2(cb, SHL, 4, Literal, 0, TempReg, ta); - uLiteral(cb, sz/2); - } - uInstr2(cb, ADD, 4, TempReg, ta, TempReg, td); - uInstr2(cb, PUT, 4, TempReg, td, ArchReg, R_EDI); - - uInstr1(cb, JMP, 0, Literal, 0); - uLiteral(cb, eip); - uCond(cb, CondAlways); -} - - -/* Template for CMPS, _not_ preceded by a REP prefix. */ -static -void codegen_CMPS ( UCodeBlock* cb, Int sz ) -{ - Int td, /* EDI */ ts, /* ESI */ - tdv, /* (EDI) */ tsv /* (ESI) */; - tdv = newTemp(cb); - tsv = newTemp(cb); - td = newTemp(cb); - ts = newTemp(cb); - - uInstr2(cb, GET, 4, ArchReg, R_EDI, TempReg, td); - uInstr2(cb, GET, 4, ArchReg, R_ESI, TempReg, ts); - - uInstr2(cb, LOAD, sz, TempReg, td, TempReg, tdv); - uInstr2(cb, LOAD, sz, TempReg, ts, TempReg, tsv); - - uInstr2(cb, SUB, sz, TempReg, tdv, TempReg, tsv); - setFlagsFromUOpcode(cb, SUB); - - uInstr0(cb, CALLM_S, 0); - uInstr2(cb, MOV, 4, Literal, 0, TempReg, tdv); - uLiteral(cb, 0); - uInstr1(cb, PUSH, 4, TempReg, tdv); - - uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag)); - uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty); - - uInstr1(cb, POP, 4, TempReg, tdv); - uInstr0(cb, CALLM_E, 0); - - if (sz == 4 || sz == 2) { - uInstr2(cb, SHL, 4, Literal, 0, TempReg, tdv); - uLiteral(cb, sz/2); - } - uInstr2(cb, ADD, 4, TempReg, tdv, TempReg, td); - uInstr2(cb, ADD, 4, TempReg, tdv, TempReg, ts); - - uInstr2(cb, PUT, 4, TempReg, td, ArchReg, R_EDI); - uInstr2(cb, PUT, 4, TempReg, ts, ArchReg, R_ESI); -} - - -/* Template for MOVS, _not_ preceded by a REP prefix. */ -static -void codegen_MOVS ( UCodeBlock* cb, Int sz ) -{ - Int tv, /* the value being copied */ - td, /* EDI */ ts /* ESI */; - tv = newTemp(cb); - td = newTemp(cb); - ts = newTemp(cb); - - uInstr2(cb, GET, 4, ArchReg, R_EDI, TempReg, td); - uInstr2(cb, GET, 4, ArchReg, R_ESI, TempReg, ts); - - uInstr2(cb, LOAD, sz, TempReg, ts, TempReg, tv); - uInstr2(cb, STORE, sz, TempReg, tv, TempReg, td); - SMC_IF_SOME(cb); - - uInstr0(cb, CALLM_S, 0); - uInstr2(cb, MOV, 4, Literal, 0, TempReg, tv); - uLiteral(cb, 0); - uInstr1(cb, PUSH, 4, TempReg, tv); - - uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag)); - uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty); - - uInstr1(cb, POP, 4, TempReg, tv); - uInstr0(cb, CALLM_E, 0); - - if (sz == 4 || sz == 2) { - uInstr2(cb, SHL, 4, Literal, 0, TempReg, tv); - uLiteral(cb, sz/2); - } - uInstr2(cb, ADD, 4, TempReg, tv, TempReg, td); - uInstr2(cb, ADD, 4, TempReg, tv, TempReg, ts); - - uInstr2(cb, PUT, 4, TempReg, td, ArchReg, R_EDI); - uInstr2(cb, PUT, 4, TempReg, ts, ArchReg, R_ESI); -} - - -/* Template for STOS, _not_ preceded by a REP prefix. */ -static -void codegen_STOS ( UCodeBlock* cb, Int sz ) -{ - Int ta /* EAX */, td /* EDI */; - ta = newTemp(cb); - td = newTemp(cb); - - uInstr2(cb, GET, sz, ArchReg, R_EAX, TempReg, ta); - uInstr2(cb, GET, 4, ArchReg, R_EDI, TempReg, td); - uInstr2(cb, STORE, sz, TempReg, ta, TempReg, td); - SMC_IF_SOME(cb); - - uInstr0(cb, CALLM_S, 0); - uInstr2(cb, MOV, 4, Literal, 0, TempReg, ta); - uLiteral(cb, 0); - uInstr1(cb, PUSH, 4, TempReg, ta); - - uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag)); - uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty); - - uInstr1(cb, POP, 4, TempReg, ta); - uInstr0(cb, CALLM_E, 0); - - if (sz == 4 || sz == 2) { - uInstr2(cb, SHL, 4, Literal, 0, TempReg, ta); - uLiteral(cb, sz/2); - } - uInstr2(cb, ADD, 4, TempReg, ta, TempReg, td); - uInstr2(cb, PUT, 4, TempReg, td, ArchReg, R_EDI); -} - - -/* Template for LODS, _not_ preceded by a REP prefix. */ -static -void codegen_LODS ( UCodeBlock* cb, Int sz ) -{ - Int ta /* EAX */, ts /* ESI */; - ta = newTemp(cb); - ts = newTemp(cb); - - uInstr2(cb, GET, 4, ArchReg, R_ESI, TempReg, ts); - uInstr2(cb, LOAD, sz, TempReg, ts, TempReg, ta); - uInstr2(cb, PUT, sz, TempReg, ta, ArchReg, R_EAX); - - uInstr0(cb, CALLM_S, 0); - uInstr2(cb, MOV, 4, Literal, 0, TempReg, ta); - uLiteral(cb, 0); - uInstr1(cb, PUSH, 4, TempReg, ta); - - uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag)); - uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty); - - uInstr1(cb, POP, 4, TempReg, ta); - uInstr0(cb, CALLM_E, 0); - - if (sz == 4 || sz == 2) { - uInstr2(cb, SHL, 4, Literal, 0, TempReg, ta); - uLiteral(cb, sz/2); - } - uInstr2(cb, ADD, 4, TempReg, ta, TempReg, ts); - uInstr2(cb, PUT, 4, TempReg, ts, ArchReg, R_ESI); -} - - -/* Template for REPNE SCAS, _not_ preceded by a REP prefix. */ -static -void codegen_SCAS ( UCodeBlock* cb, Int sz ) -{ - Int ta /* EAX */, td /* EDI */, tv; - ta = newTemp(cb); - tv = newTemp(cb); - td = newTemp(cb); - - uInstr2(cb, GET, sz, ArchReg, R_EAX, TempReg, ta); - uInstr2(cb, GET, 4, ArchReg, R_EDI, TempReg, td); - uInstr2(cb, LOAD, sz, TempReg, td, TempReg, tv); - /* next uinstr kills ta, but that's ok -- don't need it again */ - uInstr2(cb, SUB, sz, TempReg, tv, TempReg, ta); - setFlagsFromUOpcode(cb, SUB); - - uInstr0(cb, CALLM_S, 0); - uInstr2(cb, MOV, 4, Literal, 0, TempReg, tv); - uLiteral(cb, 0); - uInstr1(cb, PUSH, 4, TempReg, tv); - - uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_get_dirflag)); - uFlagsRWU(cb, FlagD, FlagsEmpty, FlagsEmpty); - - uInstr1(cb, POP, 4, TempReg, tv); - uInstr0(cb, CALLM_E, 0); - - if (sz == 4 || sz == 2) { - uInstr2(cb, SHL, 4, Literal, 0, TempReg, tv); - uLiteral(cb, sz/2); - } - uInstr2(cb, ADD, 4, TempReg, tv, TempReg, td); - uInstr2(cb, PUT, 4, TempReg, td, ArchReg, R_EDI); -} - - -/* (I)MUL E, G. Supplied eip points to the modR/M byte. */ -static -Addr dis_mul_E_G ( UCodeBlock* cb, - Int size, - Addr eip0, - Bool signed_multiply ) -{ - Int ta, tg, te, helper; - UChar dis_buf[50]; - UChar rm = getUChar(eip0); - ta = INVALID_TEMPREG; - te = newTemp(cb); - tg = newTemp(cb); - - switch (size) { - case 4: helper = signed_multiply ? VGOFF_(helper_imul_32_64) - : VGOFF_(helper_mul_32_64); - break; - case 2: helper = signed_multiply ? VGOFF_(helper_imul_16_32) - : VGOFF_(helper_mul_16_32); - break; - case 1: helper = signed_multiply ? VGOFF_(helper_imul_8_16) - : VGOFF_(helper_mul_8_16); - break; - default: VG_(panic)("dis_mul_E_G"); - } - - uInstr0(cb, CALLM_S, 0); - if (epartIsReg(rm)) { - uInstr2(cb, GET, size, ArchReg, eregOfRM(rm), TempReg, te); - uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, tg); - uInstr1(cb, PUSH, size, TempReg, te); - uInstr1(cb, PUSH, size, TempReg, tg); - uInstr1(cb, CALLM, 0, Lit16, helper); - uFlagsRWU(cb, FlagsEmpty, FlagsOC, FlagsSZAP); - uInstr1(cb, CLEAR, 0, Lit16, 4); - uInstr1(cb, POP, size, TempReg, tg); - uInstr2(cb, PUT, size, TempReg, tg, ArchReg, gregOfRM(rm)); - uInstr0(cb, CALLM_E, 0); - if (dis) VG_(printf)("%smul%c %s, %s\n", - signed_multiply ? "i" : "", - nameISize(size), - nameIReg(size,eregOfRM(rm)), - nameIReg(size,gregOfRM(rm))); - return 1+eip0; - } else { - UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL); - ta = LOW24(pair); - uInstr2(cb, LOAD, size, TempReg, ta, TempReg, te); - uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, tg); - uInstr1(cb, PUSH, size, TempReg, te); - uInstr1(cb, PUSH, size, TempReg, tg); - uInstr1(cb, CALLM, 0, Lit16, helper); - uFlagsRWU(cb, FlagsEmpty, FlagsOC, FlagsSZAP); - uInstr1(cb, CLEAR, 0, Lit16, 4); - uInstr1(cb, POP, size, TempReg, tg); - uInstr2(cb, PUT, size, TempReg, tg, ArchReg, gregOfRM(rm)); - uInstr0(cb, CALLM_E, 0); - if (dis) VG_(printf)("%smul%c %s, %s\n", - signed_multiply ? "i" : "", - nameISize(size), - dis_buf,nameIReg(size,gregOfRM(rm))); - return HI8(pair)+eip0; - } -} - - -/* IMUL I * E -> G. Supplied eip points to the modR/M byte. */ -static -Addr dis_imul_I_E_G ( UCodeBlock* cb, - Int size, - Addr eip, - Int litsize ) -{ - Int ta, te, tl, helper, d32; - UChar dis_buf[50]; - UChar rm = getUChar(eip); - ta = INVALID_TEMPREG; - te = newTemp(cb); - tl = newTemp(cb); - - switch (size) { - case 4: helper = VGOFF_(helper_imul_32_64); break; - case 2: helper = VGOFF_(helper_imul_16_32); break; - case 1: helper = VGOFF_(helper_imul_8_16); break; - default: VG_(panic)("dis_imul_I_E_G"); - } - - uInstr0(cb, CALLM_S, 0); - if (epartIsReg(rm)) { - uInstr2(cb, GET, size, ArchReg, eregOfRM(rm), TempReg, te); - uInstr1(cb, PUSH, size, TempReg, te); - eip++; - } else { - UInt pair = disAMode ( cb, eip, dis?dis_buf:NULL); - ta = LOW24(pair); - uInstr2(cb, LOAD, size, TempReg, ta, TempReg, te); - uInstr1(cb, PUSH, size, TempReg, te); - eip += HI8(pair); - } - - d32 = getSDisp(litsize,eip); - eip += litsize; - - uInstr2(cb, MOV, size, Literal, 0, TempReg, tl); - uLiteral(cb, d32); - uInstr1(cb, PUSH, size, TempReg, tl); - uInstr1(cb, CALLM, 0, Lit16, helper); - uFlagsRWU(cb, FlagsEmpty, FlagsOC, FlagsSZAP); - uInstr1(cb, CLEAR, 0, Lit16, 4); - uInstr1(cb, POP, size, TempReg, te); - uInstr2(cb, PUT, size, TempReg, te, ArchReg, gregOfRM(rm)); - uInstr0(cb, CALLM_E, 0); - - if (dis) { - if (epartIsReg(rm)) { - VG_(printf)("imul %d, %s, %s\n", d32, nameIReg(size,eregOfRM(rm)), - nameIReg(size,gregOfRM(rm))); - } else { - VG_(printf)("imul %d, %s, %s\n", d32, dis_buf, - nameIReg(size,gregOfRM(rm))); - } - } - - return eip; -} - - -/* Handle FPU insns which read/write memory. On entry, eip points to - the second byte of the insn (the one following D8 .. DF). */ -static -Addr dis_fpu_mem ( UCodeBlock* cb, Int size, Bool is_write, - Addr eip, UChar first_byte ) -{ - Int ta; - UInt pair; - UChar dis_buf[50]; - UChar second_byte = getUChar(eip); - vg_assert(second_byte < 0xC0); - second_byte &= 0x38; - pair = disAMode ( cb, eip, dis?dis_buf:NULL ); - ta = LOW24(pair); - eip += HI8(pair); - uInstr2(cb, is_write ? FPU_W : FPU_R, size, - Lit16, - (((UShort)first_byte) << 8) | ((UShort)second_byte), - TempReg, ta); - if (is_write) SMC_IF_ALL(cb); - if (dis) { - if (is_write) - VG_(printf)("fpu_w_%d 0x%x:0x%x, %s\n", - size, (UInt)first_byte, - (UInt)second_byte, dis_buf ); - else - VG_(printf)("fpu_r_%d %s, 0x%x:0x%x\n", - size, dis_buf, - (UInt)first_byte, - (UInt)second_byte ); - } - return eip; -} - - -/* Handle FPU insns which don't reference memory. On entry, eip points to - the second byte of the insn (the one following D8 .. DF). */ -static -Addr dis_fpu_no_mem ( UCodeBlock* cb, Addr eip, UChar first_byte ) -{ - Bool sets_ZCP = False; - Bool uses_ZCP = False; - UChar second_byte = getUChar(eip); eip++; - vg_assert(second_byte >= 0xC0); - - /* Does the insn write any integer condition codes (%EIP) ? */ - - if (first_byte == 0xDB && second_byte >= 0xF0 && second_byte <= 0xF7) { - /* FCOMI */ - sets_ZCP = True; - } else - if (first_byte == 0xDF && second_byte >= 0xF0 && second_byte <= 0xF7) { - /* FCOMIP */ - sets_ZCP = True; - } else - if (first_byte == 0xDB && second_byte >= 0xE8 && second_byte <= 0xEF) { - /* FUCOMI */ - sets_ZCP = True; - } else - if (first_byte == 0xDF && second_byte >= 0xE8 && second_byte <= 0xEF) { - /* FUCOMIP */ - sets_ZCP = True; - } - - /* Dually, does the insn read any integer condition codes (%EIP) ? */ - - if (first_byte == 0xDA && second_byte >= 0xC0 && second_byte <= 0xDF) { - /* FCMOVB %st(n), %st(0) - FCMOVE %st(n), %st(0) - FCMOVBE %st(n), %st(0) - FCMOVU %st(n), %st(0) - */ - uses_ZCP = True; - } else - if (first_byte == 0xDB && second_byte >= 0xC0 && second_byte <= 0xDF) { - /* FCMOVNB %st(n), %st(0) - FCMOVNE %st(n), %st(0) - FCMOVNBE %st(n), %st(0) - FCMOVNU %st(n), %st(0) - */ - uses_ZCP = True; - } - - uInstr1(cb, FPU, 0, - Lit16, - (((UShort)first_byte) << 8) | ((UShort)second_byte) - ); - if (uses_ZCP) { - /* VG_(printf)("!!! --- FPU insn which reads %EFLAGS\n"); */ - uFlagsRWU(cb, FlagsZCP, FlagsEmpty, FlagsEmpty); - vg_assert(!sets_ZCP); - } - if (sets_ZCP) { - /* VG_(printf)("!!! --- FPU insn which writes %EFLAGS\n"); */ - uFlagsRWU(cb, FlagsEmpty, FlagsZCP, FlagsEmpty); - vg_assert(!uses_ZCP); - } - - if (dis) VG_(printf)("fpu 0x%x:0x%x%s%s\n", - (UInt)first_byte, (UInt)second_byte, - uses_ZCP ? " -rZCP" : "", - sets_ZCP ? " -wZCP" : "" ); - return eip; -} - - -/* Top-level handler for all FPU insns. On entry, eip points to the - second byte of the insn. */ -static -Addr dis_fpu ( UCodeBlock* cb, UChar first_byte, Addr eip ) -{ - const Bool rd = False; - const Bool wr = True; - UChar second_byte = getUChar(eip); - - /* Handle FSTSW %ax specially. */ - if (first_byte == 0xDF && second_byte == 0xE0) { - Int t1 = newTemp(cb); - uInstr0(cb, CALLM_S, 0); - uInstr2(cb, MOV, 4, Literal, 0, TempReg, t1); - uLiteral(cb, 0); - uInstr1(cb, PUSH, 4, TempReg, t1); - uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_fstsw_AX) ); - uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty); - uInstr1(cb, POP, 2, TempReg, t1); - uInstr2(cb, PUT, 2, TempReg, t1, ArchReg, R_EAX); - uInstr0(cb, CALLM_E, 0); - if (dis) VG_(printf)("fstsw %%ax\n"); - eip++; - return eip; - } - - /* Handle all non-memory FPU ops simply. */ - if (second_byte >= 0xC0) - return dis_fpu_no_mem ( cb, eip, first_byte ); - - /* The insn references memory; need to determine - whether it reads or writes, and at what size. */ - switch (first_byte) { - - case 0xD8: - switch ((second_byte >> 3) & 7) { - case 0: /* FADDs */ - case 1: /* FMULs */ - case 2: /* FCOMs */ - case 3: /* FCOMPs */ - case 4: /* FSUBs */ - case 5: /* FSUBRs */ - case 6: /* FDIVs */ - case 7: /* FDIVRs */ - return dis_fpu_mem(cb, 4, rd, eip, first_byte); - default: - goto unhandled; - } - break; - - case 0xD9: - switch ((second_byte >> 3) & 7) { - case 0: /* FLDs */ - return dis_fpu_mem(cb, 4, rd, eip, first_byte); - case 2: /* FSTs */ - case 3: /* FSTPs */ - return dis_fpu_mem(cb, 4, wr, eip, first_byte); - case 4: /* FLDENV */ - return dis_fpu_mem(cb, 28, rd, eip, first_byte); - case 5: /* FLDCW */ - return dis_fpu_mem(cb, 2, rd, eip, first_byte); - case 6: /* FNSTENV */ - return dis_fpu_mem(cb, 28, wr, eip, first_byte); - case 7: /* FSTCW */ - /* HACK! FSTCW actually writes 2 bytes, not 4. glibc - gets lots of moaning in __floor() if we do the right - thing here. */ - /* Later ... hack disabled .. we do do the Right Thing. */ - return dis_fpu_mem(cb, /*4*/ 2, wr, eip, first_byte); - default: - goto unhandled; - } - break; - - case 0xDA: - switch ((second_byte >> 3) & 7) { - case 0: /* FIADD */ - case 1: /* FIMUL */ - case 2: /* FICOM */ - case 3: /* FICOMP */ - case 4: /* FISUB */ - case 5: /* FISUBR */ - case 6: /* FIDIV */ - case 7: /* FIDIVR */ - return dis_fpu_mem(cb, 4, rd, eip, first_byte); - default: - goto unhandled; - } - break; - - case 0xDB: - switch ((second_byte >> 3) & 7) { - case 0: /* FILD dword-integer */ - return dis_fpu_mem(cb, 4, rd, eip, first_byte); - case 2: /* FIST dword-integer */ - return dis_fpu_mem(cb, 4, wr, eip, first_byte); - case 3: /* FISTPl */ - return dis_fpu_mem(cb, 4, wr, eip, first_byte); - case 5: /* FLD extended-real */ - return dis_fpu_mem(cb, 10, rd, eip, first_byte); - case 7: /* FSTP extended-real */ - return dis_fpu_mem(cb, 10, wr, eip, first_byte); - default: - goto unhandled; - } - break; - - case 0xDC: - switch ((second_byte >> 3) & 7) { - case 0: /* FADD double-real */ - case 1: /* FMUL double-real */ - case 2: /* FCOM double-real */ - case 3: /* FCOMP double-real */ - case 4: /* FSUB double-real */ - case 5: /* FSUBR double-real */ - case 6: /* FDIV double-real */ - case 7: /* FDIVR double-real */ - return dis_fpu_mem(cb, 8, rd, eip, first_byte); - default: - goto unhandled; - } - break; - - case 0xDD: - switch ((second_byte >> 3) & 7) { - case 0: /* FLD double-real */ - return dis_fpu_mem(cb, 8, rd, eip, first_byte); - case 2: /* FST double-real */ - case 3: /* FSTP double-real */ - return dis_fpu_mem(cb, 8, wr, eip, first_byte); - default: - goto unhandled; - } - break; - - case 0xDF: - switch ((second_byte >> 3) & 7) { - case 0: /* FILD word-integer */ - return dis_fpu_mem(cb, 2, rd, eip, first_byte); - case 2: /* FIST word-integer */ - return dis_fpu_mem(cb, 2, wr, eip, first_byte); - case 3: /* FISTP word-integer */ - return dis_fpu_mem(cb, 2, wr, eip, first_byte); - case 5: /* FILD qword-integer */ - return dis_fpu_mem(cb, 8, rd, eip, first_byte); - case 7: /* FISTP qword-integer */ - return dis_fpu_mem(cb, 8, wr, eip, first_byte); - default: - goto unhandled; - } - break; - - default: goto unhandled; - } - - unhandled: - VG_(printf)("dis_fpu: unhandled memory case 0x%2x:0x%2x(%d)\n", - (UInt)first_byte, (UInt)second_byte, - (UInt)((second_byte >> 3) & 7) ); - VG_(panic)("dis_fpu: unhandled opcodes"); -} - - -/* Double length left shifts. Apparently only required in v-size (no - b- variant). */ -static -Addr dis_SHLRD_Gv_Ev ( UCodeBlock* cb, Addr eip, UChar modrm, - Int sz, - Tag amt_tag, UInt amt_val, - Bool left_shift ) -{ - /* amt_tag and amt_val denote either ArchReg(%CL) or a Literal. - And eip on entry points at the modrm byte. */ - Int t, t1, t2, ta, helper; - UInt pair; - UChar dis_buf[50]; - - vg_assert(sz == 2 || sz == 4); - - helper = left_shift - ? (sz==4 ? VGOFF_(helper_shldl) - : VGOFF_(helper_shldw)) - : (sz==4 ? VGOFF_(helper_shrdl) - : VGOFF_(helper_shrdw)); - - /* Get the amount to be shifted by onto the stack. */ - t = newTemp(cb); - t1 = newTemp(cb); - t2 = newTemp(cb); - if (amt_tag == ArchReg) { - vg_assert(amt_val == R_CL); - uInstr2(cb, GET, 1, ArchReg, amt_val, TempReg, t); - } else { - uInstr2(cb, MOV, 1, Literal, 0, TempReg, t); - uLiteral(cb, amt_val); - } - - uInstr0(cb, CALLM_S, 0); - uInstr1(cb, PUSH, 1, TempReg, t); - - /* The E-part is the destination; this is shifted. The G-part - supplies bits to be shifted into the E-part, but is not - changed. */ - - uInstr2(cb, GET, sz, ArchReg, gregOfRM(modrm), TempReg, t1); - uInstr1(cb, PUSH, sz, TempReg, t1); - - if (epartIsReg(modrm)) { - eip++; - uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t2); - uInstr1(cb, PUSH, sz, TempReg, t2); - uInstr1(cb, CALLM, 0, Lit16, helper); - uFlagsRWU(cb, FlagsEmpty, FlagsOSZACP, FlagsEmpty); - uInstr1(cb, POP, sz, TempReg, t); - uInstr2(cb, PUT, sz, TempReg, t, ArchReg, eregOfRM(modrm)); - if (dis) - VG_(printf)("shld%c %%cl, %s, %s\n", - nameISize(sz), nameIReg(sz, gregOfRM(modrm)), - nameIReg(sz, eregOfRM(modrm))); - } else { - pair = disAMode ( cb, eip, dis?dis_buf:NULL ); - ta = LOW24(pair); - eip += HI8(pair); - uInstr2(cb, LOAD, sz, TempReg, ta, TempReg, t2); - uInstr1(cb, PUSH, sz, TempReg, t2); - uInstr1(cb, CALLM, 0, Lit16, helper); - uFlagsRWU(cb, FlagsEmpty, FlagsOSZACP, FlagsEmpty); - uInstr1(cb, POP, sz, TempReg, t); - uInstr2(cb, STORE, sz, TempReg, t, TempReg, ta); - SMC_IF_ALL(cb); - if (dis) - VG_(printf)("shld%c %%cl, %s, %s\n", - nameISize(sz), nameIReg(sz, gregOfRM(modrm)), - dis_buf); - } - - if (amt_tag == Literal) eip++; - uInstr1(cb, CLEAR, 0, Lit16, 8); - - uInstr0(cb, CALLM_E, 0); - return eip; -} - - -/* Handle BT/BTS/BTR/BTC Gv, Ev. Apparently b-size is not - required. */ - -typedef enum { BtOpNone, BtOpSet, BtOpReset, BtOpComp } BtOp; - -static Char* nameBtOp ( BtOp op ) -{ - switch (op) { - case BtOpNone: return ""; - case BtOpSet: return "s"; - case BtOpReset: return "r"; - case BtOpComp: return "c"; - default: VG_(panic)("nameBtOp"); - } -} - - -static -Addr dis_bt_G_E ( UCodeBlock* cb, Int sz, Addr eip, BtOp op ) -{ - UInt pair; - UChar dis_buf[50]; - UChar modrm; - - Int t_addr, t_bitno, t_mask, t_fetched, t_esp, temp, lit; - - /* 2 and 4 are actually possible. */ - vg_assert(sz == 2 || sz == 4); - /* We only handle 4. */ - vg_assert(sz == 4); - - t_addr = t_bitno = t_mask - = t_fetched = t_esp = temp = INVALID_TEMPREG; - - t_fetched = newTemp(cb); - t_bitno = newTemp(cb); - temp = newTemp(cb); - lit = newTemp(cb); - - modrm = getUChar(eip); - - uInstr2(cb, GET, sz, ArchReg, gregOfRM(modrm), TempReg, t_bitno); - - if (epartIsReg(modrm)) { - eip++; - /* Get it onto the client's stack. */ - t_esp = newTemp(cb); - t_addr = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t_esp); - uInstr2(cb, SUB, sz, Literal, 0, TempReg, t_esp); - uLiteral(cb, sz); - uInstr2(cb, PUT, 4, TempReg, t_esp, ArchReg, R_ESP); - uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, temp); - uInstr2(cb, STORE, sz, TempReg, temp, TempReg, t_esp); - /* Make ta point at it. */ - uInstr2(cb, MOV, 4, TempReg, t_esp, TempReg, t_addr); - /* Mask out upper bits of the shift amount, since we're doing a - reg. */ - uInstr2(cb, MOV, 4, Literal, 0, TempReg, lit); - uLiteral(cb, sz == 4 ? 31 : 15); - uInstr2(cb, AND, 4, TempReg, lit, TempReg, t_bitno); - } else { - pair = disAMode ( cb, eip, dis?dis_buf:NULL ); - t_addr = LOW24(pair); - eip += HI8(pair); - } - - /* At this point: ta points to the address being operated on. If - it was a reg, we will have pushed it onto the client's stack. - t_bitno is the bit number, suitable masked in the case of a reg. */ - - /* Now the main sequence. */ - - uInstr2(cb, MOV, 4, TempReg, t_bitno, TempReg, temp); - uInstr2(cb, SAR, 4, Literal, 0, TempReg, temp); - uLiteral(cb, 3); - uInstr2(cb, ADD, 4, TempReg, temp, TempReg, t_addr); - /* ta now holds effective address */ - - uInstr2(cb, MOV, 4, Literal, 0, TempReg, lit); - uLiteral(cb, 7); - uInstr2(cb, AND, 4, TempReg, lit, TempReg, t_bitno); - /* bitno contains offset of bit within byte */ - - if (op != BtOpNone) { - t_mask = newTemp(cb); - uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_mask); - uLiteral(cb, 1); - uInstr2(cb, SHL, 4, TempReg, t_bitno, TempReg, t_mask); - } - /* mask is now a suitable byte mask */ - - uInstr2(cb, LOAD, 1, TempReg, t_addr, TempReg, t_fetched); - if (op != BtOpNone) { - uInstr2(cb, MOV, 4, TempReg, t_fetched, TempReg, temp); - switch (op) { - case BtOpSet: - uInstr2(cb, OR, 4, TempReg, t_mask, TempReg, temp); - break; - case BtOpComp: - uInstr2(cb, XOR, 4, TempReg, t_mask, TempReg, temp); - break; - case BtOpReset: - uInstr1(cb, NOT, 4, TempReg, t_mask); - uInstr2(cb, AND, 4, TempReg, t_mask, TempReg, temp); - break; - default: - VG_(panic)("dis_bt_G_E"); - } - uInstr2(cb, STORE, 1, TempReg, temp, TempReg, t_addr); - } - - /* Side effect done; now get selected bit into Carry flag */ - - uInstr2(cb, SHR, 4, TempReg, t_bitno, TempReg, t_fetched); - /* at bit 0 of fetched */ - - uInstr2(cb, MOV, 4, Literal, 0, TempReg, lit); - uLiteral(cb, 1); - uInstr2(cb, AND, 4, TempReg, lit, TempReg, t_fetched); - /* fetched is now 1 or 0 */ - - /* NEG is a handy way to convert zero/nonzero into the carry - flag. */ - uInstr1(cb, NEG, 4, TempReg, t_fetched); - setFlagsFromUOpcode(cb, NEG); - /* fetched is now in carry flag */ - - /* Move reg operand from stack back to reg */ - if (epartIsReg(modrm)) { - /* t_esp still points at it. */ - uInstr2(cb, LOAD, sz, TempReg, t_esp, TempReg, temp); - uInstr2(cb, PUT, sz, TempReg, temp, ArchReg, eregOfRM(modrm)); - uInstr2(cb, ADD, sz, Literal, 0, TempReg, t_esp); - uLiteral(cb, sz); - uInstr2(cb, PUT, 4, TempReg, t_esp, ArchReg, R_ESP); - } - - if (epartIsReg(modrm)) { - if (dis) - VG_(printf)("bt%s%c %s, %s\n", - nameBtOp(op), - nameISize(sz), nameIReg(sz, gregOfRM(modrm)), - nameIReg(sz, eregOfRM(modrm))); - } else { - if (dis) - VG_(printf)("bt%s%c %s, %s\n", - nameBtOp(op), - nameISize(sz), nameIReg(sz, gregOfRM(modrm)), - dis_buf); - } - - return eip; -} - - - - -/* Handle BSF/BSR. Only v-size seems necessary. */ -static -Addr dis_bs_E_G ( UCodeBlock* cb, Int sz, Addr eip, Bool fwds ) -{ - Int t, t1, ta, helper; - UInt pair; - UChar dis_buf[50]; - UChar modrm; - - vg_assert(sz == 2 || sz == 4); - vg_assert(sz==4); - - helper = fwds ? VGOFF_(helper_bsf) : VGOFF_(helper_bsr); - modrm = getUChar(eip); - t1 = newTemp(cb); - t = newTemp(cb); - - uInstr0(cb, CALLM_S, 0); - uInstr2(cb, GET, sz, ArchReg, gregOfRM(modrm), TempReg, t1); - uInstr1(cb, PUSH, sz, TempReg, t1); - - if (epartIsReg(modrm)) { - eip++; - uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t); - if (dis) - VG_(printf)("bs%c%c %s, %s\n", - fwds ? 'f' : 'r', - nameISize(sz), nameIReg(sz, eregOfRM(modrm)), - nameIReg(sz, gregOfRM(modrm))); - } else { - pair = disAMode ( cb, eip, dis?dis_buf:NULL ); - ta = LOW24(pair); - eip += HI8(pair); - uInstr2(cb, LOAD, sz, TempReg, ta, TempReg, t); - if (dis) - VG_(printf)("bs%c%c %s, %s\n", - fwds ? 'f' : 'r', - nameISize(sz), dis_buf, - nameIReg(sz, gregOfRM(modrm))); - } - - uInstr1(cb, PUSH, sz, TempReg, t); - uInstr1(cb, CALLM, 0, Lit16, helper); - uFlagsRWU(cb, FlagsEmpty, FlagZ, FlagsOSACP); - uInstr1(cb, POP, sz, TempReg, t); - uInstr1(cb, POP, sz, TempReg, t); - uInstr2(cb, PUT, sz, TempReg, t, ArchReg, gregOfRM(modrm)); - uInstr0(cb, CALLM_E, 0); - - return eip; -} - - -static -void codegen_xchg_eAX_Reg ( UCodeBlock* cb, Int sz, Int reg ) -{ - Int t1, t2; - vg_assert(sz == 2 || sz == 4); - t1 = newTemp(cb); - t2 = newTemp(cb); - uInstr2(cb, GET, sz, ArchReg, R_EAX, TempReg, t1); - uInstr2(cb, GET, sz, ArchReg, reg, TempReg, t2); - uInstr2(cb, PUT, sz, TempReg, t2, ArchReg, R_EAX); - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, reg); - if (dis) - VG_(printf)("xchg%c %s, %s\n", nameISize(sz), - nameIReg(sz, R_EAX), nameIReg(sz, reg)); -} - - -static -void codegen_SAHF ( UCodeBlock* cb ) -{ - Int t = newTemp(cb); - Int t2 = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, R_EAX, TempReg, t); - - /* Mask out parts of t not corresponding to %AH. This stops the - instrumenter complaining if they are undefined. Otherwise, the - instrumenter would check all 32 bits of t at the PUSH, which - could be the cause of incorrect warnings. Discovered by Daniel - Veillard . - */ - uInstr2(cb, MOV, 4, Literal, 0, TempReg, t2); - uLiteral(cb, 0x0000FF00); - uInstr2(cb, AND, 4, TempReg, t2, TempReg, t); - /* We deliberately don't set the condition codes here, since this - AND is purely internal to Valgrind and nothing to do with the - client's state. */ - - uInstr0(cb, CALLM_S, 0); - uInstr1(cb, PUSH, 4, TempReg, t); - uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_SAHF)); - uFlagsRWU(cb, FlagsEmpty, FlagsSZACP, FlagsEmpty); - uInstr1(cb, CLEAR, 0, Lit16, 4); - uInstr0(cb, CALLM_E, 0); -} - - -static -Addr dis_cmpxchg_G_E ( UCodeBlock* cb, - Int size, - Addr eip0 ) -{ - Int ta, junk, dest, src, acc; - UChar dis_buf[50]; - UChar rm; - - rm = getUChar(eip0); - acc = newTemp(cb); - src = newTemp(cb); - dest = newTemp(cb); - junk = newTemp(cb); - /* Only needed to get gcc's dataflow analyser off my back. */ - ta = INVALID_TEMPREG; - - if (epartIsReg(rm)) { - uInstr2(cb, GET, size, ArchReg, eregOfRM(rm), TempReg, dest); - eip0++; - if (dis) VG_(printf)("cmpxchg%c %s,%s\n", - nameISize(size), - nameIReg(size,gregOfRM(rm)), - nameIReg(size,eregOfRM(rm)) ); - nameIReg(size,eregOfRM(rm)); - } else { - UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL ); - ta = LOW24(pair); - uInstr2(cb, LOAD, size, TempReg, ta, TempReg, dest); - eip0 += HI8(pair); - if (dis) VG_(printf)("cmpxchg%c %s,%s\n", nameISize(size), - nameIReg(size,gregOfRM(rm)), dis_buf); - } - - uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, src); - uInstr2(cb, GET, size, ArchReg, R_EAX, TempReg, acc); - uInstr2(cb, MOV, size, TempReg, acc, TempReg, junk); - uInstr2(cb, SUB, size, TempReg, dest, TempReg, junk); - setFlagsFromUOpcode(cb, SUB); - - uInstr2(cb, CMOV, 4, TempReg, src, TempReg, dest); - uCond(cb, CondZ); - uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty); - uInstr2(cb, CMOV, 4, TempReg, dest, TempReg, acc); - uCond(cb, CondNZ); - uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty); - - uInstr2(cb, PUT, size, TempReg, acc, ArchReg, R_EAX); - if (epartIsReg(rm)) { - uInstr2(cb, PUT, size, TempReg, dest, ArchReg, eregOfRM(rm)); - } else { - uInstr2(cb, STORE, size, TempReg, dest, TempReg, ta); - } - - return eip0; -} - - -/* Handle conditional move instructions of the form - cmovcc E(reg-or-mem), G(reg) - - E(src) is reg-or-mem - G(dst) is reg. - - If E is reg, --> GET %E, tmps - GET %G, tmpd - CMOVcc tmps, tmpd - PUT tmpd, %G - - If E is mem --> (getAddr E) -> tmpa - LD (tmpa), tmps - GET %G, tmpd - CMOVcc tmps, tmpd - PUT tmpd, %G -*/ -static -Addr dis_cmov_E_G ( UCodeBlock* cb, - Int size, - Condcode cond, - Addr eip0 ) -{ - UChar rm = getUChar(eip0); - UChar dis_buf[50]; - - Int tmps = newTemp(cb); - Int tmpd = newTemp(cb); - - if (epartIsReg(rm)) { - uInstr2(cb, GET, size, ArchReg, eregOfRM(rm), TempReg, tmps); - uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, tmpd); - uInstr2(cb, CMOV, 4, TempReg, tmps, TempReg, tmpd); - uCond(cb, cond); - uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty); - uInstr2(cb, PUT, size, TempReg, tmpd, ArchReg, gregOfRM(rm)); - if (dis) VG_(printf)("cmov%c%s %s,%s\n", - nameISize(size), - VG_(nameCondcode)(cond), - nameIReg(size,eregOfRM(rm)), - nameIReg(size,gregOfRM(rm))); - return 1+eip0; - } - - /* E refers to memory */ - { - UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL); - Int tmpa = LOW24(pair); - uInstr2(cb, LOAD, size, TempReg, tmpa, TempReg, tmps); - uInstr2(cb, GET, size, ArchReg, gregOfRM(rm), TempReg, tmpd); - uInstr2(cb, CMOV, 4, TempReg, tmps, TempReg, tmpd); - uCond(cb, cond); - uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty); - uInstr2(cb, PUT, size, TempReg, tmpd, ArchReg, gregOfRM(rm)); - if (dis) VG_(printf)("cmov%c%s %s,%s\n", - nameISize(size), - VG_(nameCondcode)(cond), - dis_buf, - nameIReg(size,gregOfRM(rm))); - return HI8(pair)+eip0; - } -} - - -static -Addr dis_xadd_G_E ( UCodeBlock* cb, - Int sz, - Addr eip0 ) -{ - UChar rm = getUChar(eip0); - UChar dis_buf[50]; - - Int tmpd = newTemp(cb); - Int tmpt = newTemp(cb); - - if (epartIsReg(rm)) { - uInstr2(cb, GET, sz, ArchReg, eregOfRM(rm), TempReg, tmpd); - uInstr2(cb, GET, sz, ArchReg, gregOfRM(rm), TempReg, tmpt); - uInstr2(cb, ADD, sz, TempReg, tmpd, TempReg, tmpt); - setFlagsFromUOpcode(cb, ADD); - uInstr2(cb, PUT, sz, TempReg, tmpt, ArchReg, eregOfRM(rm)); - uInstr2(cb, PUT, sz, TempReg, tmpd, ArchReg, gregOfRM(rm)); - if (dis) - VG_(printf)("xadd%c %s, %s\n", nameISize(sz), - nameIReg(sz,gregOfRM(rm)), - nameIReg(sz,eregOfRM(rm))); - return 1+eip0; - } else { - UInt pair = disAMode ( cb, eip0, dis?dis_buf:NULL); - Int tmpa = LOW24(pair); - uInstr2(cb, LOAD, sz, TempReg, tmpa, TempReg, tmpd); - uInstr2(cb, GET, sz, ArchReg, gregOfRM(rm), TempReg, tmpt); - uInstr2(cb, ADD, sz, TempReg, tmpd, TempReg, tmpt); - setFlagsFromUOpcode(cb, ADD); - uInstr2(cb, STORE, sz, TempReg, tmpt, TempReg, tmpa); - SMC_IF_SOME(cb); - uInstr2(cb, PUT, sz, TempReg, tmpd, ArchReg, gregOfRM(rm)); - if (dis) - VG_(printf)("xadd%c %s, %s\n", nameISize(sz), - nameIReg(sz,gregOfRM(rm)), - dis_buf); - return HI8(pair)+eip0; - } -} - - -/*------------------------------------------------------------*/ -/*--- Disassembling entire basic blocks ---*/ -/*------------------------------------------------------------*/ - -/* Disassemble a single instruction into ucode, returning the update - eip, and setting *isEnd to True if this is the last insn in a basic - block. Also do debug printing if (dis). */ - -static Addr disInstr ( UCodeBlock* cb, Addr eip, Bool* isEnd ) -{ - UChar opc, modrm, abyte; - UInt d32, pair; - Int t1, t2, t3, t4; - UChar dis_buf[50]; - Int am_sz, d_sz; - - Int sz = 4; - Int first_uinstr = cb->used; - *isEnd = False; - t1 = t2 = t3 = t4 = INVALID_TEMPREG; - - if (dis) VG_(printf)("\t0x%x: ", eip); - - /* Spot the client-request magic sequence. */ - { - UChar* myeip = (UChar*)eip; - /* Spot this: - C1C01D roll $29, %eax - C1C003 roll $3, %eax - C1C81B rorl $27, %eax - C1C805 rorl $5, %eax - C1C00D roll $13, %eax - C1C013 roll $19, %eax - */ - if (myeip[ 0] == 0xC1 && myeip[ 1] == 0xC0 && myeip[ 2] == 0x1D && - myeip[ 3] == 0xC1 && myeip[ 4] == 0xC0 && myeip[ 5] == 0x03 && - myeip[ 6] == 0xC1 && myeip[ 7] == 0xC8 && myeip[ 8] == 0x1B && - myeip[ 9] == 0xC1 && myeip[10] == 0xC8 && myeip[11] == 0x05 && - myeip[12] == 0xC1 && myeip[13] == 0xC0 && myeip[14] == 0x0D && - myeip[15] == 0xC1 && myeip[16] == 0xC0 && myeip[17] == 0x13 - ) { - eip += 18; - uInstr1(cb, JMP, 0, Literal, 0); - uLiteral(cb, eip); - uCond(cb, CondAlways); - LAST_UINSTR(cb).jmpkind = JmpClientReq; - *isEnd = True; - if (dis) - VG_(printf)("%%edx = client_request ( %%eax )\n"); - return eip; - } - } - - /* Skip a LOCK prefix. */ - if (getUChar(eip) == 0xF0) { - /* VG_(printf)("LOCK LOCK LOCK LOCK LOCK \n"); */ - eip++; - } - - /* Crap out if we see a segment override prefix. */ - if (getUChar(eip) == 0x65) { - VG_(message)(Vg_DebugMsg, ""); - VG_(message)(Vg_DebugMsg, "Possible workaround for the following abort: do not use special"); - VG_(message)(Vg_DebugMsg, "PII/PIII-specific pthreads library (possibly in /lib/i686/*.so)."); - VG_(message)(Vg_DebugMsg, "You might be able to kludge around this by renaming /lib/i686 to"); - VG_(message)(Vg_DebugMsg, "/lib/i686-HIDDEN. On RedHat 7.2 this causes ld.so to fall back"); - VG_(message)(Vg_DebugMsg, "to using the less specialised versions in /lib instead, which"); - VG_(message)(Vg_DebugMsg, "valgrind might be able to better deal with."); - VG_(message)(Vg_DebugMsg, ""); - VG_(message)(Vg_DebugMsg, "WARNING. WARNING. WARNING. WARNING. WARNING. WARNING. WARNING."); - VG_(message)(Vg_DebugMsg, "WARNING: The suggested kludge may also render your system unbootable"); - VG_(message)(Vg_DebugMsg, "WARNING: or otherwise totally screw it up. Only try this if you"); - VG_(message)(Vg_DebugMsg, "WARNING: know what you are doing, and are prepared to take risks."); - VG_(message)(Vg_DebugMsg, "YOU HAVE BEEN WARNED. YOU HAVE BEEN WARNED. YOU HAVE BEEN WARNED."); - VG_(message)(Vg_DebugMsg, ""); - VG_(message)(Vg_DebugMsg, "Another consideration is that this may well mean your application"); - VG_(message)(Vg_DebugMsg, "uses threads, which valgrind doesn't currently support, so even if"); - VG_(message)(Vg_DebugMsg, "you work around this problem, valgrind may abort later if it sees"); - VG_(message)(Vg_DebugMsg, "a clone() system call."); - VG_(unimplemented)("x86 segment override (SEG=GS) prefix; see above for details"); - } - - /* Detect operand-size overrides. */ - if (getUChar(eip) == 0x66) { sz = 2; eip++; }; - - opc = getUChar(eip); eip++; - - switch (opc) { - - /* ------------------------ Control flow --------------- */ - - case 0xC2: /* RET imm16 */ - d32 = getUDisp16(eip); eip += 2; - goto do_Ret; - case 0xC3: /* RET */ - d32 = 0; - goto do_Ret; - do_Ret: - t1 = newTemp(cb); t2 = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t1); - uInstr2(cb, LOAD, 4, TempReg, t1, TempReg, t2); - uInstr2(cb, ADD, 4, Literal, 0, TempReg, t1); - uLiteral(cb, 4+d32); - uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_ESP); - uInstr1(cb, JMP, 0, TempReg, t2); - uCond(cb, CondAlways); - LAST_UINSTR(cb).jmpkind = JmpRet; - - *isEnd = True; - if (dis) { - if (d32 == 0) VG_(printf)("ret\n"); - else VG_(printf)("ret %d\n", d32); - } - break; - - case 0xE8: /* CALL J4 */ - d32 = getUDisp32(eip); eip += 4; - d32 += eip; /* eip now holds return-to addr, d32 is call-to addr */ - if (d32 == eip && getUChar(eip) >= 0x58 - && getUChar(eip) <= 0x5F) { - /* Specially treat the position-independent-code idiom - call X - X: popl %reg - as - movl %eip, %reg. - since this generates better code, but for no other reason. */ - Int archReg = getUChar(eip) - 0x58; - /* VG_(printf)("-- fPIC thingy\n"); */ - t1 = newTemp(cb); - uInstr2(cb, MOV, 4, Literal, 0, TempReg, t1); - uLiteral(cb, eip); - uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, archReg); - eip++; /* Step over the POP */ - if (dis) - VG_(printf)("call 0x%x ; popl %s\n",d32,nameIReg(4,archReg)); - } else { - /* The normal sequence for a call. */ - t1 = newTemp(cb); t2 = newTemp(cb); t3 = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t3); - uInstr2(cb, MOV, 4, TempReg, t3, TempReg, t1); - uInstr2(cb, SUB, 4, Literal, 0, TempReg, t1); - uLiteral(cb, 4); - uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_ESP); - uInstr2(cb, MOV, 4, Literal, 0, TempReg, t2); - uLiteral(cb, eip); - uInstr2(cb, STORE, 4, TempReg, t2, TempReg, t1); - SMC_IF_ALL(cb); - uInstr1(cb, JMP, 0, Literal, 0); - uLiteral(cb, d32); - uCond(cb, CondAlways); - LAST_UINSTR(cb).jmpkind = JmpCall; - *isEnd = True; - if (dis) VG_(printf)("call 0x%x\n",d32); - } - break; - - case 0xC9: /* LEAVE */ - t1 = newTemp(cb); t2 = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, R_EBP, TempReg, t1); - uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_ESP); - uInstr2(cb, LOAD, 4, TempReg, t1, TempReg, t2); - uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBP); - uInstr2(cb, ADD, 4, Literal, 0, TempReg, t1); - uLiteral(cb, 4); - uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_ESP); - if (dis) VG_(printf)("leave"); - break; - - /* ---------------- Misc wierd-ass insns --------------- */ - - case 0x27: /* DAA */ - case 0x2F: /* DAS */ - t1 = newTemp(cb); - uInstr2(cb, GET, 1, ArchReg, R_AL, TempReg, t1); - /* Widen %AL to 32 bits, so it's all defined when we push it. */ - uInstr1(cb, WIDEN, 4, TempReg, t1); - LAST_UINSTR(cb).extra4b = 1; - LAST_UINSTR(cb).signed_widen = False; - uInstr0(cb, CALLM_S, 0); - uInstr1(cb, PUSH, 4, TempReg, t1); - uInstr1(cb, CALLM, 0, Lit16, - opc == 0x27 ? VGOFF_(helper_DAA) : VGOFF_(helper_DAS) ); - uFlagsRWU(cb, FlagsAC, FlagsOSZACP, FlagsEmpty); - uInstr1(cb, POP, 4, TempReg, t1); - uInstr0(cb, CALLM_E, 0); - uInstr2(cb, PUT, 1, TempReg, t1, ArchReg, R_AL); - if (dis) VG_(printf)(opc == 0x27 ? "daa\n" : "das\n"); - break; - - /* ------------------------ CWD/CDQ -------------------- */ - - case 0x98: /* CBW */ - t1 = newTemp(cb); - if (sz == 4) { - uInstr2(cb, GET, 2, ArchReg, R_EAX, TempReg, t1); - uInstr1(cb, WIDEN, 4, TempReg, t1); /* 4 == dst size */ - LAST_UINSTR(cb).extra4b = 2; /* the source size */ - LAST_UINSTR(cb).signed_widen = True; - uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_EAX); - if (dis) VG_(printf)("cwd\n"); - } else { - vg_assert(sz == 2); - uInstr2(cb, GET, 1, ArchReg, R_EAX, TempReg, t1); - uInstr1(cb, WIDEN, 2, TempReg, t1); /* 2 == dst size */ - LAST_UINSTR(cb).extra4b = 1; /* the source size */ - LAST_UINSTR(cb).signed_widen = True; - uInstr2(cb, PUT, 2, TempReg, t1, ArchReg, R_EAX); - if (dis) VG_(printf)("cbw\n"); - } - break; - - case 0x99: /* CWD/CDQ */ - t1 = newTemp(cb); - uInstr2(cb, GET, sz, ArchReg, R_EAX, TempReg, t1); - uInstr2(cb, SAR, sz, Literal, 0, TempReg, t1); - uLiteral(cb, sz == 2 ? 15 : 31); - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, R_EDX); - if (dis) VG_(printf)(sz == 2 ? "cwdq\n" : "cdqq\n"); - break; - - /* ------------------------ FPU ops -------------------- */ - - case 0x9E: /* SAHF */ - codegen_SAHF ( cb ); - if (dis) VG_(printf)("sahf\n"); - break; - - case 0x9B: /* FWAIT */ - /* ignore? */ - if (dis) VG_(printf)("fwait\n"); - break; - - case 0xD8: - case 0xD9: - case 0xDA: - case 0xDB: - case 0xDC: - case 0xDD: - case 0xDE: - case 0xDF: - eip = dis_fpu ( cb, opc, eip ); - break; - - /* ------------------------ INC & DEC ------------------ */ - - case 0x40: /* INC eAX */ - case 0x41: /* INC eCX */ - case 0x42: /* INC eDX */ - case 0x43: /* INC eBX */ - case 0x45: /* INC eBP */ - case 0x46: /* INC eSI */ - case 0x47: /* INC eDI */ - t1 = newTemp(cb); - uInstr2(cb, GET, sz, ArchReg, (UInt)(opc - 0x40), - TempReg, t1); - uInstr1(cb, INC, sz, TempReg, t1); - setFlagsFromUOpcode(cb, INC); - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, - (UInt)(opc - 0x40)); - if (dis) - VG_(printf)("inc%c %s\n", nameISize(sz), nameIReg(sz,opc-0x40)); - break; - - case 0x48: /* DEC eAX */ - case 0x49: /* DEC eCX */ - case 0x4A: /* DEC eDX */ - case 0x4B: /* DEC eBX */ - case 0x4D: /* DEC eBP */ - case 0x4E: /* DEC eSI */ - case 0x4F: /* DEC eDI */ - t1 = newTemp(cb); - uInstr2(cb, GET, sz, ArchReg, (UInt)(opc - 0x48), - TempReg, t1); - uInstr1(cb, DEC, sz, TempReg, t1); - setFlagsFromUOpcode(cb, DEC); - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, - (UInt)(opc - 0x48)); - if (dis) - VG_(printf)("dec%c %s\n", nameISize(sz), nameIReg(sz,opc-0x48)); - break; - - /* ------------------------ INT ------------------------ */ - - case 0xCD: /* INT imm8 */ - d32 = getUChar(eip); eip++; - if (d32 != 0x80) VG_(panic)("disInstr: INT but not 0x80 !"); - /* It's important that all ArchRegs carry their up-to-date value - at this point. So we declare an end-of-block here, which - forces any TempRegs caching ArchRegs to be flushed. */ - uInstr1(cb, JMP, 0, Literal, 0); - uLiteral(cb, eip); - uCond(cb, CondAlways); - LAST_UINSTR(cb).jmpkind = JmpSyscall; - *isEnd = True; - if (dis) VG_(printf)("int $0x80\n"); - break; - - /* ------------------------ Jcond, byte offset --------- */ - - case 0xEB: /* Jb (jump, byte offset) */ - d32 = (eip+1) + getSDisp8(eip); eip++; - uInstr1(cb, JMP, 0, Literal, 0); - uLiteral(cb, d32); - uCond(cb, CondAlways); - *isEnd = True; - if (dis) - VG_(printf)("jmp-8 0x%x\n", d32); - break; - - case 0xE9: /* Jv (jump, 16/32 offset) */ - d32 = (eip+sz) + getSDisp(sz,eip); eip += sz; - uInstr1(cb, JMP, 0, Literal, 0); - uLiteral(cb, d32); - uCond(cb, CondAlways); - *isEnd = True; - if (dis) - VG_(printf)("jmp 0x%x\n", d32); - break; - - case 0x70: - case 0x71: - case 0x72: /* JBb/JNAEb (jump below) */ - case 0x73: /* JNBb/JAEb (jump not below) */ - case 0x74: /* JZb/JEb (jump zero) */ - case 0x75: /* JNZb/JNEb (jump not zero) */ - case 0x76: /* JBEb/JNAb (jump below or equal) */ - case 0x77: /* JNBEb/JAb (jump not below or equal) */ - case 0x78: /* JSb (jump negative) */ - case 0x79: /* JSb (jump not negative) */ - case 0x7A: /* JP (jump parity even) */ - case 0x7B: /* JNP/JPO (jump parity odd) */ - case 0x7C: /* JLb/JNGEb (jump less) */ - case 0x7D: /* JGEb/JNLb (jump greater or equal) */ - case 0x7E: /* JLEb/JNGb (jump less or equal) */ - case 0x7F: /* JGb/JNLEb (jump greater) */ - d32 = (eip+1) + getSDisp8(eip); eip++; - uInstr1(cb, JMP, 0, Literal, 0); - uLiteral(cb, d32); - uCond(cb, (Condcode)(opc - 0x70)); - uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty); - /* It's actually acceptable not to end this basic block at a - control transfer, reducing the number of jumps through - vg_dispatch, at the expense of possibly translating the insns - following this jump twice. This does give faster code, but - on the whole I don't think the effort is worth it. */ - uInstr1(cb, JMP, 0, Literal, 0); - uLiteral(cb, eip); - uCond(cb, CondAlways); - *isEnd = True; - /* The above 3 lines would be removed if the bb was not to end - here. */ - if (dis) - VG_(printf)("j%s-8 0x%x\n", VG_(nameCondcode)(opc - 0x70), d32); - break; - - case 0xE3: /* JECXZ or perhaps JCXZ, depending on OSO ? Intel - manual says it depends on address size override, - which doesn't sound right to me. */ - d32 = (eip+1) + getSDisp8(eip); eip++; - t1 = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, R_ECX, TempReg, t1); - uInstr2(cb, JIFZ, 4, TempReg, t1, Literal, 0); - uLiteral(cb, d32); - if (dis) - VG_(printf)("j%sz 0x%x\n", nameIReg(sz, R_ECX), d32); - break; - - case 0xE2: /* LOOP disp8 */ - /* Again, the docs say this uses ECX/CX as a count depending on - the address size override, not the operand one. Since we - don't handle address size overrides, I guess that means - ECX. */ - d32 = (eip+1) + getSDisp8(eip); eip++; - t1 = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, R_ECX, TempReg, t1); - uInstr1(cb, DEC, 4, TempReg, t1); - uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_ECX); - uInstr2(cb, JIFZ, 4, TempReg, t1, Literal, 0); - uLiteral(cb, eip); - uInstr1(cb, JMP, 0, Literal, 0); - uLiteral(cb, d32); - uCond(cb, CondAlways); - *isEnd = True; - if (dis) - VG_(printf)("loop 0x%x\n", d32); - break; - - /* ------------------------ IMUL ----------------------- */ - - case 0x69: /* IMUL Iv, Ev, Gv */ - eip = dis_imul_I_E_G ( cb, sz, eip, sz ); - break; - case 0x6B: /* IMUL Ib, Ev, Gv */ - eip = dis_imul_I_E_G ( cb, sz, eip, 1 ); - break; - - /* ------------------------ MOV ------------------------ */ - - case 0x88: /* MOV Gb,Eb */ - eip = dis_mov_G_E(cb, 1, eip); - break; - - case 0x89: /* MOV Gv,Ev */ - eip = dis_mov_G_E(cb, sz, eip); - break; - - case 0x8A: /* MOV Eb,Gb */ - eip = dis_mov_E_G(cb, 1, eip); - break; - - case 0x8B: /* MOV Ev,Gv */ - eip = dis_mov_E_G(cb, sz, eip); - break; - - case 0x8D: /* LEA M,Gv */ - modrm = getUChar(eip); - if (epartIsReg(modrm)) - VG_(panic)("LEA M,Gv: modRM refers to register"); - pair = disAMode ( cb, eip, dis?dis_buf:NULL ); - eip += HI8(pair); - t1 = LOW24(pair); - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, gregOfRM(modrm)); - if (dis) - VG_(printf)("lea%c %s, %s\n", nameISize(sz), dis_buf, - nameIReg(sz,gregOfRM(modrm))); - break; - - case 0xA0: /* MOV Ob,AL */ - sz = 1; - /* Fall through ... */ - case 0xA1: /* MOV Ov,eAX */ - d32 = getUDisp32(eip); eip += 4; - t1 = newTemp(cb); t2 = newTemp(cb); - uInstr2(cb, MOV, 4, Literal, 0, TempReg, t2); - uLiteral(cb, d32); - uInstr2(cb, LOAD, sz, TempReg, t2, TempReg, t1); - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, R_EAX); - if (dis) VG_(printf)("mov%c 0x%x,%s\n", nameISize(sz), - d32, nameIReg(sz,R_EAX)); - break; - - case 0xA2: /* MOV AL,Ob */ - sz = 1; - /* Fall through ... */ - case 0xA3: /* MOV eAX,Ov */ - d32 = getUDisp32(eip); eip += 4; - t1 = newTemp(cb); t2 = newTemp(cb); - uInstr2(cb, GET, sz, ArchReg, R_EAX, TempReg, t1); - uInstr2(cb, MOV, 4, Literal, 0, TempReg, t2); - uLiteral(cb, d32); - uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2); - SMC_IF_SOME(cb); - if (dis) VG_(printf)("mov%c %s,0x%x\n", nameISize(sz), - nameIReg(sz,R_EAX), d32); - break; - - case 0xB0: /* MOV imm,AL */ - case 0xB1: /* MOV imm,CL */ - case 0xB2: /* MOV imm,DL */ - case 0xB3: /* MOV imm,BL */ - case 0xB4: /* MOV imm,AH */ - case 0xB5: /* MOV imm,CH */ - case 0xB6: /* MOV imm,DH */ - case 0xB7: /* MOV imm,BH */ - d32 = getUChar(eip); eip += 1; - t1 = newTemp(cb); - uInstr2(cb, MOV, 1, Literal, 0, TempReg, t1); - uLiteral(cb, d32); - uInstr2(cb, PUT, 1, TempReg, t1, ArchReg, opc-0xB0); - if (dis) VG_(printf)("movb $0x%x,%s\n", d32, - nameIReg(1,opc-0xB0)); - break; - - case 0xB8: /* MOV imm,eAX */ - case 0xB9: /* MOV imm,eCX */ - case 0xBA: /* MOV imm,eDX */ - case 0xBB: /* MOV imm,eBX */ - case 0xBD: /* MOV imm,eBP */ - case 0xBE: /* MOV imm,eSI */ - case 0xBF: /* MOV imm,eDI */ - d32 = getUDisp(sz,eip); eip += sz; - t1 = newTemp(cb); - uInstr2(cb, MOV, sz, Literal, 0, TempReg, t1); - uLiteral(cb, d32); - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, opc-0xB8); - if (dis) VG_(printf)("mov%c $0x%x,%s\n", nameISize(sz), d32, - nameIReg(sz,opc-0xB8)); - break; - - case 0xC6: /* MOV Ib,Eb */ - sz = 1; - goto do_Mov_I_E; - case 0xC7: /* MOV Iv,Ev */ - goto do_Mov_I_E; - - do_Mov_I_E: - modrm = getUChar(eip); - if (epartIsReg(modrm)) { - d32 = getUDisp(sz,eip); eip += sz; - t1 = newTemp(cb); - uInstr2(cb, MOV, sz, Literal, 0, TempReg, t1); - uLiteral(cb, d32); - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, eregOfRM(modrm)); - if (dis) VG_(printf)("mov%c $0x%x, %s\n", nameISize(sz), d32, - nameIReg(sz,eregOfRM(modrm))); - } else { - pair = disAMode ( cb, eip, dis?dis_buf:NULL ); - eip += HI8(pair); - d32 = getUDisp(sz,eip); eip += sz; - t1 = newTemp(cb); - t2 = LOW24(pair); - uInstr2(cb, MOV, sz, Literal, 0, TempReg, t1); - uLiteral(cb, d32); - uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2); - SMC_IF_SOME(cb); - if (dis) VG_(printf)("mov%c $0x%x, %s\n", nameISize(sz), d32, dis_buf); - } - break; - - /* ------------------------ opl imm, A ----------------- */ - - case 0x04: /* ADD Ib, AL */ - eip = dis_op_imm_A(cb, 1, ADD, True, eip, "add" ); - break; - case 0x05: /* ADD Iv, eAX */ - eip = dis_op_imm_A(cb, sz, ADD, True, eip, "add" ); - break; - - case 0x0C: /* OR Ib, AL */ - eip = dis_op_imm_A(cb, 1, OR, True, eip, "or" ); - break; - case 0x0D: /* OR Iv, eAX */ - eip = dis_op_imm_A(cb, sz, OR, True, eip, "or" ); - break; - - case 0x1C: /* SBB Ib, AL */ - eip = dis_op_imm_A(cb, 1, SBB, True, eip, "sbb" ); - break; - - case 0x24: /* AND Ib, AL */ - eip = dis_op_imm_A(cb, 1, AND, True, eip, "and" ); - break; - case 0x25: /* AND Iv, eAX */ - eip = dis_op_imm_A(cb, sz, AND, True, eip, "and" ); - break; - - case 0x2C: /* SUB Ib, AL */ - eip = dis_op_imm_A(cb, 1, SUB, True, eip, "sub" ); - break; - case 0x2D: /* SUB Iv, eAX */ - eip = dis_op_imm_A(cb, sz, SUB, True, eip, "sub" ); - break; - - case 0x34: /* XOR Ib, AL */ - eip = dis_op_imm_A(cb, 1, XOR, True, eip, "xor" ); - break; - case 0x35: /* XOR Iv, eAX */ - eip = dis_op_imm_A(cb, sz, XOR, True, eip, "xor" ); - break; - - case 0x3C: /* CMP Ib, AL */ - eip = dis_op_imm_A(cb, 1, SUB, False, eip, "cmp" ); - break; - case 0x3D: /* CMP Iv, eAX */ - eip = dis_op_imm_A(cb, sz, SUB, False, eip, "cmp" ); - break; - - case 0xA8: /* TEST Ib, AL */ - eip = dis_op_imm_A(cb, 1, AND, False, eip, "test" ); - break; - case 0xA9: /* TEST Iv, eAX */ - eip = dis_op_imm_A(cb, sz, AND, False, eip, "test" ); - break; - - /* ------------------------ opl Ev, Gv ----------------- */ - - case 0x02: /* ADD Eb,Gb */ - eip = dis_op2_E_G ( cb, ADD, True, 1, eip, "add" ); - break; - case 0x03: /* ADD Ev,Gv */ - eip = dis_op2_E_G ( cb, ADD, True, sz, eip, "add" ); - break; - - case 0x0A: /* OR Eb,Gb */ - eip = dis_op2_E_G ( cb, OR, True, 1, eip, "or" ); - break; - case 0x0B: /* OR Ev,Gv */ - eip = dis_op2_E_G ( cb, OR, True, sz, eip, "or" ); - break; - - case 0x12: /* ADC Eb,Gb */ - eip = dis_op2_E_G ( cb, ADC, True, 1, eip, "adc" ); - break; - case 0x13: /* ADC Ev,Gv */ - eip = dis_op2_E_G ( cb, ADC, True, sz, eip, "adc" ); - break; - - case 0x1B: /* SBB Ev,Gv */ - eip = dis_op2_E_G ( cb, SBB, True, sz, eip, "sbb" ); - break; - - case 0x22: /* AND Eb,Gb */ - eip = dis_op2_E_G ( cb, AND, True, 1, eip, "and" ); - break; - case 0x23: /* AND Ev,Gv */ - eip = dis_op2_E_G ( cb, AND, True, sz, eip, "and" ); - break; - - case 0x2A: /* SUB Eb,Gb */ - eip = dis_op2_E_G ( cb, SUB, True, 1, eip, "sub" ); - break; - case 0x2B: /* SUB Ev,Gv */ - eip = dis_op2_E_G ( cb, SUB, True, sz, eip, "sub" ); - break; - - case 0x32: /* XOR Eb,Gb */ - eip = dis_op2_E_G ( cb, XOR, True, 1, eip, "xor" ); - break; - case 0x33: /* XOR Ev,Gv */ - eip = dis_op2_E_G ( cb, XOR, True, sz, eip, "xor" ); - break; - - case 0x3A: /* CMP Eb,Gb */ - eip = dis_op2_E_G ( cb, SUB, False, 1, eip, "cmp" ); - break; - case 0x3B: /* CMP Ev,Gv */ - eip = dis_op2_E_G ( cb, SUB, False, sz, eip, "cmp" ); - break; - - case 0x84: /* TEST Eb,Gb */ - eip = dis_op2_E_G ( cb, AND, False, 1, eip, "test" ); - break; - case 0x85: /* TEST Ev,Gv */ - eip = dis_op2_E_G ( cb, AND, False, sz, eip, "test" ); - break; - - /* ------------------------ opl Gv, Ev ----------------- */ - - case 0x00: /* ADD Gb,Eb */ - eip = dis_op2_G_E ( cb, ADD, True, 1, eip, "add" ); - break; - case 0x01: /* ADD Gv,Ev */ - eip = dis_op2_G_E ( cb, ADD, True, sz, eip, "add" ); - break; - - case 0x08: /* OR Gb,Eb */ - eip = dis_op2_G_E ( cb, OR, True, 1, eip, "or" ); - break; - case 0x09: /* OR Gv,Ev */ - eip = dis_op2_G_E ( cb, OR, True, sz, eip, "or" ); - break; - - case 0x11: /* ADC Gv,Ev */ - eip = dis_op2_G_E ( cb, ADC, True, sz, eip, "adc" ); - break; - - case 0x19: /* SBB Gv,Ev */ - eip = dis_op2_G_E ( cb, SBB, True, sz, eip, "sbb" ); - break; - - case 0x20: /* AND Gb,Eb */ - eip = dis_op2_G_E ( cb, AND, True, 1, eip, "and" ); - break; - case 0x21: /* AND Gv,Ev */ - eip = dis_op2_G_E ( cb, AND, True, sz, eip, "and" ); - break; - - case 0x28: /* SUB Gb,Eb */ - eip = dis_op2_G_E ( cb, SUB, True, 1, eip, "sub" ); - break; - case 0x29: /* SUB Gv,Ev */ - eip = dis_op2_G_E ( cb, SUB, True, sz, eip, "sub" ); - break; - - case 0x30: /* XOR Gb,Eb */ - eip = dis_op2_G_E ( cb, XOR, True, 1, eip, "xor" ); - break; - case 0x31: /* XOR Gv,Ev */ - eip = dis_op2_G_E ( cb, XOR, True, sz, eip, "xor" ); - break; - - case 0x38: /* CMP Gb,Eb */ - eip = dis_op2_G_E ( cb, SUB, False, 1, eip, "cmp" ); - break; - case 0x39: /* CMP Gv,Ev */ - eip = dis_op2_G_E ( cb, SUB, False, sz, eip, "cmp" ); - break; - - /* ------------------------ POP ------------------------ */ - - case 0x58: /* POP eAX */ - case 0x59: /* POP eCX */ - case 0x5A: /* POP eDX */ - case 0x5B: /* POP eBX */ - case 0x5D: /* POP eBP */ - case 0x5E: /* POP eSI */ - case 0x5F: /* POP eDI */ - { Int n_pops; - Addr eipS, eipE; - UChar ch; - if (sz != 4) goto normal_pop_case; - if (VG_(clo_cachesim)) goto normal_pop_case; - /* eip points at first pop insn + 1. Make eipS and eipE - bracket the sequence. */ - eipE = eipS = eip - 1; - while (True) { - ch = getUChar(eipE+1); - if (ch < 0x58 || ch > 0x5F || ch == 0x5C) break; - eipE++; - } - n_pops = eipE - eipS + 1; - if (0 && n_pops > 1) VG_(printf)("%d pops\n", n_pops); - t1 = newTemp(cb); t3 = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t1); - for (; eipS <= eipE; eipS++) { - ch = getUChar(eipS); - uInstr2(cb, LOAD, 4, TempReg, t1, TempReg, t3); - uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, ch-0x58); - uInstr2(cb, ADD, 4, Literal, 0, TempReg, t1); - uLiteral(cb, 4); - SMC_IF_ALL(cb); - if (dis) - VG_(printf)("popl %s\n", nameIReg(4,ch-0x58)); - } - uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_ESP); - eip = eipE + 1; - break; - } - - case 0x5C: /* POP eSP */ - normal_pop_case: - t1 = newTemp(cb); t2 = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2); - uInstr2(cb, LOAD, sz, TempReg, t2, TempReg, t1); - uInstr2(cb, ADD, 4, Literal, 0, TempReg, t2); - uLiteral(cb, sz); - uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP); - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, opc-0x58); - if (dis) - VG_(printf)("pop%c %s\n", nameISize(sz), nameIReg(sz,opc-0x58)); - break; - - case 0x9D: /* POPF */ - vg_assert(sz == 2 || sz == 4); - t1 = newTemp(cb); t2 = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2); - uInstr2(cb, LOAD, sz, TempReg, t2, TempReg, t1); - uInstr2(cb, ADD, 4, Literal, 0, TempReg, t2); - uLiteral(cb, sz); - uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP); - uInstr1(cb, PUTF, sz, TempReg, t1); - /* PUTF writes all the flags we are interested in */ - uFlagsRWU(cb, FlagsEmpty, FlagsALL, FlagsEmpty); - if (dis) - VG_(printf)("popf%c\n", nameISize(sz)); - break; - - case 0x61: /* POPA */ - { Int reg; - /* Just to keep things sane, we assert for a size 4. It's - probably OK for size 2 as well, but I'd like to find a test - case; ie, have the assertion fail, before committing to it. - If it fails for you, uncomment the sz == 2 bit, try again, - and let me know whether or not it works. (jseward@acm.org). */ - vg_assert(sz == 4 /* || sz == 2 */); - - /* Eight values are popped, one per register, but the value of - %esp on the stack is ignored and instead incremented (in one - hit at the end) for each of the values. */ - t1 = newTemp(cb); t2 = newTemp(cb); t3 = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t2); - uInstr2(cb, MOV, 4, TempReg, t2, TempReg, t3); - - /* Do %edi, %esi, %ebp */ - for (reg = 7; reg >= 5; reg--) { - uInstr2(cb, LOAD, sz, TempReg, t2, TempReg, t1); - uInstr2(cb, ADD, 4, Literal, 0, TempReg, t2); - uLiteral(cb, sz); - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, reg); - } - /* Ignore (skip) value of %esp on stack. */ - uInstr2(cb, ADD, 4, Literal, 0, TempReg, t2); - uLiteral(cb, sz); - /* Do %ebx, %edx, %ecx, %eax */ - for (reg = 3; reg >= 0; reg--) { - uInstr2(cb, LOAD, sz, TempReg, t2, TempReg, t1); - uInstr2(cb, ADD, 4, Literal, 0, TempReg, t2); - uLiteral(cb, sz); - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, reg); - } - uInstr2(cb, ADD, 4, Literal, 0, TempReg, t3); - uLiteral(cb, sz * 8); /* One 'sz' per register */ - uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, R_ESP); - if (dis) - VG_(printf)("popa%c\n", nameISize(sz)); - break; - } - - case 0x8F: /* POPL/POPW m32 */ - { UInt pair1; - Int tmpa; - UChar rm = getUChar(eip); - - /* make sure this instruction is correct POP */ - vg_assert(!epartIsReg(rm) && (gregOfRM(rm) == 0)); - /* and has correct size */ - vg_assert(sz == 4); - - t1 = newTemp(cb); t3 = newTemp(cb); - /* set t1 to ESP: t1 = ESP */ - uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t1); - /* load M[ESP] to virtual register t3: t3 = M[t1] */ - uInstr2(cb, LOAD, 4, TempReg, t1, TempReg, t3); - /* resolve MODR/M */ - pair1 = disAMode ( cb, eip, dis?dis_buf:NULL); - - tmpa = LOW24(pair1); - /* uInstr2(cb, LOAD, sz, TempReg, tmpa, TempReg, tmpa); */ - /* store value from stack in memory, M[m32] = t3 */ - uInstr2(cb, STORE, 4, TempReg, t3, TempReg, tmpa); - - /* increase ESP */ - uInstr2(cb, ADD, 4, Literal, 0, TempReg, t1); - uLiteral(cb, sz); - uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_ESP); - - if (dis) - VG_(printf)("popl %s\n", dis_buf); - - eip += HI8(pair1); - break; - } - - /* ------------------------ PUSH ----------------------- */ - - case 0x50: /* PUSH eAX */ - case 0x51: /* PUSH eCX */ - case 0x52: /* PUSH eDX */ - case 0x53: /* PUSH eBX */ - case 0x55: /* PUSH eBP */ - case 0x56: /* PUSH eSI */ - case 0x57: /* PUSH eDI */ - { Int n_pushes; - Addr eipS, eipE; - UChar ch; - if (sz != 4) goto normal_push_case; - if (VG_(clo_cachesim)) goto normal_push_case; - /* eip points at first push insn + 1. Make eipS and eipE - bracket the sequence. */ - eipE = eipS = eip - 1; - while (True) { - ch = getUChar(eipE+1); - if (ch < 0x50 || ch > 0x57 || ch == 0x54) break; - eipE++; - } - n_pushes = eipE - eipS + 1; - if (0 && n_pushes > 1) VG_(printf)("%d pushes\n", n_pushes); - t1 = newTemp(cb); t2 = newTemp(cb); t3 = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t1); - uInstr2(cb, MOV, 4, TempReg, t1, TempReg, t2); - uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2); - uLiteral(cb, 4 * n_pushes); - uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP); - for (; eipS <= eipE; eipS++) { - ch = getUChar(eipS); - uInstr2(cb, SUB, 4, Literal, 0, TempReg, t1); - uLiteral(cb, 4); - uInstr2(cb, GET, 4, ArchReg, ch-0x50, TempReg, t3); - uInstr2(cb, STORE, 4, TempReg, t3, TempReg, t1); - SMC_IF_ALL(cb); - if (dis) - VG_(printf)("pushl %s\n", nameIReg(4,ch-0x50)); - } - eip = eipE + 1; - break; - } - - case 0x54: /* PUSH eSP */ - normal_push_case: - /* This is the Right Way, in that the value to be pushed is - established before %esp is changed, so that pushl %esp - correctly pushes the old value. */ - t1 = newTemp(cb); t2 = newTemp(cb); t3 = newTemp(cb); - uInstr2(cb, GET, sz, ArchReg, opc-0x50, TempReg, t1); - uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t3); - uInstr2(cb, MOV, 4, TempReg, t3, TempReg, t2); - uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2); - uLiteral(cb, sz); - uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP); - uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2); - SMC_IF_ALL(cb); - if (dis) - VG_(printf)("push%c %s\n", nameISize(sz), nameIReg(sz,opc-0x50)); - break; - - case 0x68: /* PUSH Iv */ - d32 = getUDisp(sz,eip); eip += sz; - goto do_push_I; - case 0x6A: /* PUSH Ib, sign-extended to sz */ - d32 = getSDisp8(eip); eip += 1; - goto do_push_I; - do_push_I: - t1 = newTemp(cb); t2 = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t1); - uInstr2(cb, SUB, 4, Literal, 0, TempReg, t1); - uLiteral(cb, sz); - uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_ESP); - uInstr2(cb, MOV, sz, Literal, 0, TempReg, t2); - uLiteral(cb, d32); - uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t1); - SMC_IF_ALL(cb); - if (dis) - VG_(printf)("push%c $0x%x\n", nameISize(sz), d32); - break; - - case 0x9C: /* PUSHF */ - vg_assert(sz == 2 || sz == 4); - t1 = newTemp(cb); t2 = newTemp(cb); t3 = newTemp(cb); - uInstr1(cb, GETF, sz, TempReg, t1); - /* GETF reads all the flags we are interested in */ - uFlagsRWU(cb, FlagsALL, FlagsEmpty, FlagsEmpty); - uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t3); - uInstr2(cb, MOV, 4, TempReg, t3, TempReg, t2); - uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2); - uLiteral(cb, sz); - uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_ESP); - uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2); - SMC_IF_ALL(cb); - if (dis) - VG_(printf)("pushf%c\n", nameISize(sz)); - break; - - case 0x60: /* PUSHA */ - { Int reg; - /* Just to keep things sane, we assert for a size 4. It's - probably OK for size 2 as well, but I'd like to find a test - case; ie, have the assertion fail, before committing to it. - If it fails for you, uncomment the sz == 2 bit, try again, - and let me know whether or not it works. (jseward@acm.org). */ - vg_assert(sz == 4 /* || sz == 2 */); - - /* This is the Right Way, in that the value to be pushed is - established before %esp is changed, so that pusha - correctly pushes the old %esp value. New value of %esp is - pushed at start. */ - t1 = newTemp(cb); t2 = newTemp(cb); t3 = newTemp(cb); - t4 = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, R_ESP, TempReg, t3); - uInstr2(cb, MOV, 4, TempReg, t3, TempReg, t2); - uInstr2(cb, MOV, 4, TempReg, t3, TempReg, t4); - uInstr2(cb, SUB, 4, Literal, 0, TempReg, t4); - uLiteral(cb, sz * 8); /* One 'sz' per register. */ - uInstr2(cb, PUT, 4, TempReg, t4, ArchReg, R_ESP); - /* Do %eax, %ecx, %edx, %ebx */ - for (reg = 0; reg <= 3; reg++) { - uInstr2(cb, GET, sz, ArchReg, reg, TempReg, t1); - uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2); - uLiteral(cb, sz); - uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2); - SMC_IF_ALL(cb); - } - /* Push old value of %esp */ - uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2); - uLiteral(cb, sz); - uInstr2(cb, STORE, sz, TempReg, t3, TempReg, t2); - SMC_IF_ALL(cb); - /* Do %ebp, %esi, %edi */ - for (reg = 5; reg <= 7; reg++) { - uInstr2(cb, GET, sz, ArchReg, reg, TempReg, t1); - uInstr2(cb, SUB, 4, Literal, 0, TempReg, t2); - uLiteral(cb, sz); - uInstr2(cb, STORE, sz, TempReg, t1, TempReg, t2); - SMC_IF_ALL(cb); - } - if (dis) - VG_(printf)("pusha%c\n", nameISize(sz)); - break; - } - - /* ------------------------ SCAS et al ----------------- */ - - case 0xA4: /* MOVSb, no REP prefix */ - codegen_MOVS ( cb, 1 ); - if (dis) VG_(printf)("movsb\n"); - break; - case 0xA5: /* MOVSv, no REP prefix */ - codegen_MOVS ( cb, sz ); - if (dis) VG_(printf)("movs%c\n", nameISize(sz)); - break; - - case 0xA6: /* CMPSb, no REP prefix */ - codegen_CMPS ( cb, 1 ); - if (dis) VG_(printf)("cmpsb\n"); - break; - - case 0xAA: /* STOSb, no REP prefix */ - codegen_STOS ( cb, 1 ); - if (dis) VG_(printf)("stosb\n"); - break; - case 0xAB: /* STOSv, no REP prefix */ - codegen_STOS ( cb, sz ); - if (dis) VG_(printf)("stos%c\n", nameISize(sz)); - break; - - case 0xAC: /* LODSb, no REP prefix */ - codegen_LODS ( cb, 1 ); - if (dis) VG_(printf)("lodsb\n"); - break; - case 0xAD: /* LODSv, no REP prefix */ - codegen_LODS ( cb, sz ); - if (dis) VG_(printf)("lods%c\n", nameISize(sz)); - break; - - case 0xAE: /* SCASb, no REP prefix */ - codegen_SCAS ( cb, 1 ); - if (dis) VG_(printf)("scasb\n"); - break; - - case 0xFC: /* CLD */ - uInstr0(cb, CALLM_S, 0); - uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_CLD)); - uFlagsRWU(cb, FlagsEmpty, FlagD, FlagsEmpty); - uInstr0(cb, CALLM_E, 0); - if (dis) VG_(printf)("cld\n"); - break; - - case 0xFD: /* STD */ - uInstr0(cb, CALLM_S, 0); - uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_STD)); - uFlagsRWU(cb, FlagsEmpty, FlagD, FlagsEmpty); - uInstr0(cb, CALLM_E, 0); - if (dis) VG_(printf)("std\n"); - break; - - case 0xF8: /* CLC */ - uInstr0(cb, CALLM_S, 0); - uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_CLC)); - uFlagsRWU(cb, FlagsEmpty, FlagC, FlagsOSZAP); - uInstr0(cb, CALLM_E, 0); - if (dis) VG_(printf)("clc\n"); - break; - - case 0xF9: /* STC */ - uInstr0(cb, CALLM_S, 0); - uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_STC)); - uFlagsRWU(cb, FlagsEmpty, FlagC, FlagsOSZCP); - uInstr0(cb, CALLM_E, 0); - if (dis) VG_(printf)("stc\n"); - break; - - case 0xF2: { /* REPNE prefix insn */ - Addr eip_orig = eip - 1; - abyte = getUChar(eip); eip++; - if (abyte == 0x66) { sz = 2; abyte = getUChar(eip); eip++; } - - if (abyte == 0xAE || 0xAF) { /* REPNE SCAS */ - if (abyte == 0xAE) sz = 1; - codegen_REPNE_SCAS ( cb, sz, eip_orig, eip ); - *isEnd = True; - if (dis) VG_(printf)("repne scas%c\n", nameISize(sz)); - } - else { - VG_(printf)("REPNE then 0x%x\n", (UInt)abyte); - VG_(panic)("Unhandled REPNE case"); - } - break; - } - - case 0xF3: { /* REPE prefix insn */ - Addr eip_orig = eip - 1; - abyte = getUChar(eip); eip++; - if (abyte == 0x66) { sz = 2; abyte = getUChar(eip); eip++; } - - if (abyte == 0xA4 || abyte == 0xA5) { /* REPE MOV */ - if (abyte == 0xA4) sz = 1; - codegen_REPE_MOVS ( cb, sz, eip_orig, eip ); - *isEnd = True; - if (dis) VG_(printf)("repe mov%c\n", nameISize(sz)); - } - else - if (abyte == 0xA6 || abyte == 0xA7) { /* REPE CMP */ - if (abyte == 0xA6) sz = 1; - codegen_REPE_CMPS ( cb, sz, eip_orig, eip ); - *isEnd = True; - if (dis) VG_(printf)("repe cmps%c\n", nameISize(sz)); - } - else - if (abyte == 0xAA || abyte == 0xAB) { /* REPE STOS */ - if (abyte == 0xAA) sz = 1; - codegen_REPE_STOS ( cb, sz, eip_orig, eip ); - *isEnd = True; - if (dis) VG_(printf)("repe stos%c\n", nameISize(sz)); - } - else - if (abyte == 0x90) { /* REPE NOP (PAUSE) */ - if (dis) VG_(printf)("repe nop (P4 pause)\n"); - /* do nothing; apparently a hint to the P4 re spin-wait loop */ - } else { - VG_(printf)("REPE then 0x%x\n", (UInt)abyte); - VG_(panic)("Unhandled REPE case"); - } - break; - } - - /* ------------------------ XCHG ----------------------- */ - - case 0x86: /* XCHG Gb,Eb */ - sz = 1; - /* Fall through ... */ - case 0x87: /* XCHG Gv,Ev */ - modrm = getUChar(eip); - t1 = newTemp(cb); t2 = newTemp(cb); - if (epartIsReg(modrm)) { - uInstr2(cb, GET, sz, ArchReg, eregOfRM(modrm), TempReg, t1); - uInstr2(cb, GET, sz, ArchReg, gregOfRM(modrm), TempReg, t2); - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, gregOfRM(modrm)); - uInstr2(cb, PUT, sz, TempReg, t2, ArchReg, eregOfRM(modrm)); - eip++; - if (dis) - VG_(printf)("xchg%c %s, %s\n", nameISize(sz), - nameIReg(sz,gregOfRM(modrm)), - nameIReg(sz,eregOfRM(modrm))); - } else { - pair = disAMode ( cb, eip, dis?dis_buf:NULL); - t3 = LOW24(pair); - uInstr2(cb, LOAD, sz, TempReg, t3, TempReg, t1); - uInstr2(cb, GET, sz, ArchReg, gregOfRM(modrm), TempReg, t2); - uInstr2(cb, STORE, sz, TempReg, t2, TempReg, t3); - SMC_IF_SOME(cb); - uInstr2(cb, PUT, sz, TempReg, t1, ArchReg, gregOfRM(modrm)); - eip += HI8(pair); - if (dis) - VG_(printf)("xchg%c %s, %s\n", nameISize(sz), - nameIReg(sz,gregOfRM(modrm)), - dis_buf); - } - break; - - case 0x90: /* XCHG eAX,eAX */ - if (dis) VG_(printf)("nop\n"); - break; - case 0x91: /* XCHG eCX,eSI */ - case 0x96: /* XCHG eAX,eSI */ - case 0x97: /* XCHG eAX,eDI */ - codegen_xchg_eAX_Reg ( cb, sz, opc - 0x90 ); - break; - - /* ------------------------ (Grp1 extensions) ---------- */ - - case 0x80: /* Grp1 Ib,Eb */ - modrm = getUChar(eip); - am_sz = lengthAMode(eip); - sz = 1; - d_sz = 1; - d32 = getSDisp8(eip + am_sz); - eip = dis_Grp1 ( cb, eip, modrm, am_sz, d_sz, sz, d32 ); - break; - - case 0x81: /* Grp1 Iv,Ev */ - modrm = getUChar(eip); - am_sz = lengthAMode(eip); - d_sz = sz; - d32 = getUDisp(d_sz, eip + am_sz); - eip = dis_Grp1 ( cb, eip, modrm, am_sz, d_sz, sz, d32 ); - break; - - case 0x83: /* Grp1 Ib,Ev */ - modrm = getUChar(eip); - am_sz = lengthAMode(eip); - d_sz = 1; - d32 = getSDisp8(eip + am_sz); - eip = dis_Grp1 ( cb, eip, modrm, am_sz, d_sz, sz, d32 ); - break; - - /* ------------------------ (Grp2 extensions) ---------- */ - - case 0xC0: /* Grp2 Ib,Eb */ - modrm = getUChar(eip); - am_sz = lengthAMode(eip); - d_sz = 1; - d32 = getSDisp8(eip + am_sz); - sz = 1; - eip = dis_Grp2 ( cb, eip, modrm, am_sz, d_sz, sz, Literal, d32 ); - break; - - case 0xC1: /* Grp2 Ib,Ev */ - modrm = getUChar(eip); - am_sz = lengthAMode(eip); - d_sz = 1; - d32 = getSDisp8(eip + am_sz); - eip = dis_Grp2 ( cb, eip, modrm, am_sz, d_sz, sz, Literal, d32 ); - break; - - case 0xD0: /* Grp2 1,Eb */ - modrm = getUChar(eip); - am_sz = lengthAMode(eip); - d_sz = 0; - d32 = 1; - sz = 1; - eip = dis_Grp2 ( cb, eip, modrm, am_sz, d_sz, sz, Literal, d32 ); - break; - - case 0xD1: /* Grp2 1,Ev */ - modrm = getUChar(eip); - am_sz = lengthAMode(eip); - d_sz = 0; - d32 = 1; - eip = dis_Grp2 ( cb, eip, modrm, am_sz, d_sz, sz, Literal, d32 ); - break; - - case 0xD3: /* Grp2 CL,Ev */ - modrm = getUChar(eip); - am_sz = lengthAMode(eip); - d_sz = 0; - eip = dis_Grp2 ( cb, eip, modrm, am_sz, d_sz, sz, ArchReg, R_ECX ); - break; - - /* ------------------------ (Grp3 extensions) ---------- */ - - case 0xF6: /* Grp3 Eb */ - eip = dis_Grp3 ( cb, 1, eip ); - break; - case 0xF7: /* Grp3 Ev */ - eip = dis_Grp3 ( cb, sz, eip ); - break; - - /* ------------------------ (Grp4 extensions) ---------- */ - - case 0xFE: /* Grp4 Eb */ - eip = dis_Grp4 ( cb, eip ); - break; - - /* ------------------------ (Grp5 extensions) ---------- */ - - case 0xFF: /* Grp5 Ev */ - eip = dis_Grp5 ( cb, sz, eip, isEnd ); - break; - - /* ------------------------ Escapes to 2-byte opcodes -- */ - - case 0x0F: { - opc = getUChar(eip); eip++; - switch (opc) { - - /* =-=-=-=-=-=-=-=-=- Grp8 =-=-=-=-=-=-=-=-=-=-=-= */ - - case 0xBA: /* Grp8 Ib,Ev */ - modrm = getUChar(eip); - am_sz = lengthAMode(eip); - d32 = getSDisp8(eip + am_sz); - eip = dis_Grp8_BT ( cb, eip, modrm, am_sz, sz, d32 ); - break; - - /* =-=-=-=-=-=-=-=-=- BSF/BSR -=-=-=-=-=-=-=-=-=-= */ - - case 0xBC: /* BSF Gv,Ev */ - eip = dis_bs_E_G ( cb, sz, eip, True ); - break; - case 0xBD: /* BSR Gv,Ev */ - eip = dis_bs_E_G ( cb, sz, eip, False ); - break; - - /* =-=-=-=-=-=-=-=-=- BSWAP -=-=-=-=-=-=-=-=-=-=-= */ - - case 0xC8: /* BSWAP %eax */ - case 0xC9: - case 0xCA: - case 0xCB: - case 0xCC: - case 0xCD: - case 0xCE: - case 0xCF: /* BSWAP %edi */ - /* AFAICS from the Intel docs, this only exists at size 4. */ - vg_assert(sz == 4); - t1 = newTemp(cb); - uInstr2(cb, GET, 4, ArchReg, opc-0xC8, TempReg, t1); - uInstr1(cb, BSWAP, 4, TempReg, t1); - uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, opc-0xC8); - if (dis) VG_(printf)("bswapl %s\n", nameIReg(4, opc-0xC8)); - break; - - /* =-=-=-=-=-=-=-=-=- BT/BTS/BTR/BTC =-=-=-=-=-=-= */ - - case 0xA3: /* BT Gv,Ev */ - eip = dis_bt_G_E ( cb, sz, eip, BtOpNone ); - break; - case 0xB3: /* BTR Gv,Ev */ - eip = dis_bt_G_E ( cb, sz, eip, BtOpReset ); - break; - case 0xAB: /* BTS Gv,Ev */ - eip = dis_bt_G_E ( cb, sz, eip, BtOpSet ); - break; - case 0xBB: /* BTC Gv,Ev */ - eip = dis_bt_G_E ( cb, sz, eip, BtOpComp ); - break; - - /* =-=-=-=-=-=-=-=-=- CMOV =-=-=-=-=-=-=-=-=-=-=-= */ - - case 0x40: - case 0x41: - case 0x42: /* CMOVBb/CMOVNAEb (cmov below) */ - case 0x43: /* CMOVNBb/CMOVAEb (cmov not below) */ - case 0x44: /* CMOVZb/CMOVEb (cmov zero) */ - case 0x45: /* CMOVNZb/CMOVNEb (cmov not zero) */ - case 0x46: /* CMOVBEb/CMOVNAb (cmov below or equal) */ - case 0x47: /* CMOVNBEb/CMOVAb (cmov not below or equal) */ - case 0x48: /* CMOVSb (cmov negative) */ - case 0x49: /* CMOVSb (cmov not negative) */ - case 0x4A: /* CMOVP (cmov parity even) */ - case 0x4B: /* CMOVNP (cmov parity odd) */ - case 0x4C: /* CMOVLb/CMOVNGEb (cmov less) */ - case 0x4D: /* CMOVGEb/CMOVNLb (cmov greater or equal) */ - case 0x4E: /* CMOVLEb/CMOVNGb (cmov less or equal) */ - case 0x4F: /* CMOVGb/CMOVNLEb (cmov greater) */ - eip = dis_cmov_E_G(cb, sz, (Condcode)(opc - 0x40), eip); - break; - - /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */ - - case 0xB1: /* CMPXCHG Gv,Ev */ - eip = dis_cmpxchg_G_E ( cb, sz, eip ); - break; - - /* =-=-=-=-=-=-=-=-=- CPUID -=-=-=-=-=-=-=-=-=-=-= */ - - case 0xA2: /* CPUID */ - t1 = newTemp(cb); - t2 = newTemp(cb); - t3 = newTemp(cb); - t4 = newTemp(cb); - uInstr0(cb, CALLM_S, 0); - - uInstr2(cb, GET, 4, ArchReg, R_EAX, TempReg, t1); - uInstr1(cb, PUSH, 4, TempReg, t1); - - uInstr2(cb, MOV, 4, Literal, 0, TempReg, t2); - uLiteral(cb, 0); - uInstr1(cb, PUSH, 4, TempReg, t2); - - uInstr2(cb, MOV, 4, Literal, 0, TempReg, t3); - uLiteral(cb, 0); - uInstr1(cb, PUSH, 4, TempReg, t3); - - uInstr2(cb, MOV, 4, Literal, 0, TempReg, t4); - uLiteral(cb, 0); - uInstr1(cb, PUSH, 4, TempReg, t4); - - uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_CPUID)); - uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty); - - uInstr1(cb, POP, 4, TempReg, t4); - uInstr2(cb, PUT, 4, TempReg, t4, ArchReg, R_EDX); - - uInstr1(cb, POP, 4, TempReg, t3); - uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, R_ECX); - - uInstr1(cb, POP, 4, TempReg, t2); - uInstr2(cb, PUT, 4, TempReg, t2, ArchReg, R_EBX); - - uInstr1(cb, POP, 4, TempReg, t1); - uInstr2(cb, PUT, 4, TempReg, t1, ArchReg, R_EAX); - - uInstr0(cb, CALLM_E, 0); - if (dis) VG_(printf)("cpuid\n"); - break; - - /* =-=-=-=-=-=-=-=-=- MOVZX, MOVSX =-=-=-=-=-=-=-= */ - - case 0xB6: /* MOVZXb Eb,Gv */ - eip = dis_movx_E_G ( cb, eip, 1, 4, False ); - break; - case 0xB7: /* MOVZXw Ew,Gv */ - eip = dis_movx_E_G ( cb, eip, 2, 4, False ); - break; - - case 0xBE: /* MOVSXb Eb,Gv */ - eip = dis_movx_E_G ( cb, eip, 1, 4, True ); - break; - case 0xBF: /* MOVSXw Ew,Gv */ - eip = dis_movx_E_G ( cb, eip, 2, 4, True ); - break; - - /* =-=-=-=-=-=-=-=-=- MUL/IMUL =-=-=-=-=-=-=-=-=-= */ - - case 0xAF: /* IMUL Ev, Gv */ - eip = dis_mul_E_G ( cb, sz, eip, True ); - break; - - /* =-=-=-=-=-=-=-=-=- Jcond d32 -=-=-=-=-=-=-=-=-= */ - case 0x80: - case 0x81: - case 0x82: /* JBb/JNAEb (jump below) */ - case 0x83: /* JNBb/JAEb (jump not below) */ - case 0x84: /* JZb/JEb (jump zero) */ - case 0x85: /* JNZb/JNEb (jump not zero) */ - case 0x86: /* JBEb/JNAb (jump below or equal) */ - case 0x87: /* JNBEb/JAb (jump not below or equal) */ - case 0x88: /* JSb (jump negative) */ - case 0x89: /* JSb (jump not negative) */ - case 0x8A: /* JP (jump parity even) */ - case 0x8B: /* JNP/JPO (jump parity odd) */ - case 0x8C: /* JLb/JNGEb (jump less) */ - case 0x8D: /* JGEb/JNLb (jump greater or equal) */ - case 0x8E: /* JLEb/JNGb (jump less or equal) */ - case 0x8F: /* JGb/JNLEb (jump greater) */ - d32 = (eip+4) + getUDisp32(eip); eip += 4; - uInstr1(cb, JMP, 0, Literal, 0); - uLiteral(cb, d32); - uCond(cb, (Condcode)(opc - 0x80)); - uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty); - uInstr1(cb, JMP, 0, Literal, 0); - uLiteral(cb, eip); - uCond(cb, CondAlways); - *isEnd = True; - if (dis) - VG_(printf)("j%s-32 0x%x\n", - VG_(nameCondcode)(opc - 0x80), d32); - break; - - /* =-=-=-=-=-=-=-=-=- RDTSC -=-=-=-=-=-=-=-=-=-=-= */ - - case 0x31: /* RDTSC */ - t1 = newTemp(cb); - t2 = newTemp(cb); - t3 = newTemp(cb); - uInstr0(cb, CALLM_S, 0); - uInstr2(cb, MOV, 4, Literal, 0, TempReg, t1); - uLiteral(cb, 0); - uInstr1(cb, PUSH, 4, TempReg, t1); - uInstr2(cb, MOV, 4, Literal, 0, TempReg, t2); - uLiteral(cb, 0); - uInstr1(cb, PUSH, 4, TempReg, t2); - uInstr1(cb, CALLM, 0, Lit16, VGOFF_(helper_RDTSC)); - uFlagsRWU(cb, FlagsEmpty, FlagsEmpty, FlagsEmpty); - uInstr1(cb, POP, 4, TempReg, t3); - uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, R_EDX); - uInstr1(cb, POP, 4, TempReg, t3); - uInstr2(cb, PUT, 4, TempReg, t3, ArchReg, R_EAX); - uInstr0(cb, CALLM_E, 0); - if (dis) VG_(printf)("rdtsc\n"); - break; - - /* =-=-=-=-=-=-=-=-=- SETcc Eb =-=-=-=-=-=-=-=-=-= */ - case 0x90: - case 0x91: - case 0x92: /* set-Bb/set-NAEb (jump below) */ - case 0x93: /* set-NBb/set-AEb (jump not below) */ - case 0x94: /* set-Zb/set-Eb (jump zero) */ - case 0x95: /* set-NZb/set-NEb (jump not zero) */ - case 0x96: /* set-BEb/set-NAb (jump below or equal) */ - case 0x97: /* set-NBEb/set-Ab (jump not below or equal) */ - case 0x98: /* set-Sb (jump negative) */ - case 0x99: /* set-Sb (jump not negative) */ - case 0x9A: /* set-P (jump parity even) */ - case 0x9B: /* set-NP (jump parity odd) */ - case 0x9C: /* set-Lb/set-NGEb (jump less) */ - case 0x9D: /* set-GEb/set-NLb (jump greater or equal) */ - case 0x9E: /* set-LEb/set-NGb (jump less or equal) */ - case 0x9F: /* set-Gb/set-NLEb (jump greater) */ - modrm = getUChar(eip); - t1 = newTemp(cb); - if (epartIsReg(modrm)) { - eip++; - uInstr1(cb, CC2VAL, 1, TempReg, t1); - uCond(cb, (Condcode)(opc-0x90)); - uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty); - uInstr2(cb, PUT, 1, TempReg, t1, ArchReg, eregOfRM(modrm)); - if (dis) VG_(printf)("set%s %s\n", - VG_(nameCondcode)(opc-0x90), - nameIReg(1,eregOfRM(modrm))); - } else { - pair = disAMode ( cb, eip, dis?dis_buf:NULL ); - t2 = LOW24(pair); - eip += HI8(pair); - uInstr1(cb, CC2VAL, 1, TempReg, t1); - uCond(cb, (Condcode)(opc-0x90)); - uFlagsRWU(cb, FlagsOSZACP, FlagsEmpty, FlagsEmpty); - uInstr2(cb, STORE, 1, TempReg, t1, TempReg, t2); - SMC_IF_ALL(cb); - if (dis) VG_(printf)("set%s %s\n", - VG_(nameCondcode)(opc-0x90), - dis_buf); - } - break; - - /* =-=-=-=-=-=-=-=-=- SHLD/SHRD -=-=-=-=-=-=-=-=-= */ - - case 0xA4: /* SHLDv imm8,Gv,Ev */ - modrm = getUChar(eip); - eip = dis_SHLRD_Gv_Ev ( - cb, eip, modrm, sz, - Literal, getUChar(eip + lengthAMode(eip)), - True ); - break; - case 0xA5: /* SHLDv %cl,Gv,Ev */ - modrm = getUChar(eip); - eip = dis_SHLRD_Gv_Ev ( - cb, eip, modrm, sz, ArchReg, R_CL, True ); - break; - - case 0xAC: /* SHRDv imm8,Gv,Ev */ - modrm = getUChar(eip); - eip = dis_SHLRD_Gv_Ev ( - cb, eip, modrm, sz, - Literal, getUChar(eip + lengthAMode(eip)), - False ); - break; - case 0xAD: /* SHRDv %cl,Gv,Ev */ - modrm = getUChar(eip); - eip = dis_SHLRD_Gv_Ev ( - cb, eip, modrm, sz, ArchReg, R_CL, False ); - break; - - /* =-=-=-=-=-=-=-=-=- CMPXCHG -=-=-=-=-=-=-=-=-=-= */ - - case 0xC1: /* XADD Gv,Ev */ - eip = dis_xadd_G_E ( cb, sz, eip ); - break; - - /* =-=-=-=-=-=-=-=-=- unimp2 =-=-=-=-=-=-=-=-=-=-= */ - - default: - VG_(printf)("disInstr: unhandled 2-byte opcode 0x%x\n", - (UInt)opc); - VG_(printf)("This _might_ be the result of executing an " - "MMX, SSE, SSE2 or 3DNow!\n" ); - VG_(printf)("instruction. Valgrind does not currently " - "support such instructions. Sorry.\n" ); - VG_(unimplemented)("unhandled x86 0x0F 2-byte opcode"); - } - - break; - } - - /* ------------------------ ??? ------------------------ */ - - default: - VG_(printf)("disInstr: unhandled opcode 0x%x then 0x%x\n", - (UInt)opc, (UInt)getUChar(eip)); - if (opc == 0x8C) - VG_(nvidia_moan)(); - VG_(panic)("unhandled x86 opcode"); - } - - if (dis) - VG_(printf)("\n"); - for (; first_uinstr < cb->used; first_uinstr++) { - Bool sane = VG_(saneUInstr)(True, &cb->instrs[first_uinstr]); - if (dis || !sane) - VG_(ppUInstr)(sane ? first_uinstr : -1, - &cb->instrs[first_uinstr]); - vg_assert(sane); - } - - return eip; -} - - -/* Disassemble a complete basic block, starting at eip, and dumping - the ucode into cb. Returns the size, in bytes, of the basic - block. */ - -Int VG_(disBB) ( UCodeBlock* cb, Addr eip0 ) -{ - Addr eip = eip0; - Bool isEnd = False; - Bool block_sane; - Int INCEIP_allowed_lag = 4; - Int delta = 0; - - if (dis) VG_(printf)("\n"); - - /* When cache simulating, to ensure cache misses are attributed to the - * correct line we ensure EIP is always correct. This is done by: - * - * a) Using eager INCEIP updating to cope with all instructions except those - * at the end of a basic block. - * - * b) Patching in the size of the original x86 instr in the `extra4b' field - * of JMPs at the end of a basic block. Two cases: - * - Jcond followed by Juncond: patch the Jcond - * - Juncond alone: patch the Juncond - * - * See vg_cachesim_instrument() for how this is used. - */ - if (VG_(clo_cachesim)) { - INCEIP_allowed_lag = 0; - } - - if (VG_(clo_single_step)) { - eip = disInstr ( cb, eip, &isEnd ); - - /* Add a JMP to the next (single x86 instruction) BB if it doesn't - * already end with a JMP instr. We also need to check for no UCode, - * which occurs if the x86 instr was a nop */ - if (cb->used == 0 || LAST_UINSTR(cb).opcode != JMP) { - uInstr1(cb, JMP, 0, Literal, 0); - uLiteral(cb, eip); - uCond(cb, CondAlways); - if (dis) VG_(ppUInstr)(cb->used-1, &cb->instrs[cb->used-1]); - } - delta = eip - eip0; - - } else { - Addr eip2; - while (!isEnd) { - eip2 = disInstr ( cb, eip, &isEnd ); - delta += (eip2 - eip); - eip = eip2; - /* Split up giant basic blocks into pieces, so the - translations fall within 64k. */ - if (eip - eip0 > 2000 && !isEnd) { - if (VG_(clo_verbosity) > 2) - VG_(message)(Vg_DebugMsg, - "Warning: splitting giant basic block into pieces"); - uInstr1(cb, JMP, 0, Literal, 0); - uLiteral(cb, eip); - uCond(cb, CondAlways); - if (dis) VG_(ppUInstr)(cb->used-1, &cb->instrs[cb->used-1]); - isEnd = True; - - } else if (delta > INCEIP_allowed_lag && !isEnd) { - uInstr1(cb, INCEIP, 0, Lit16, delta); - if (dis) VG_(ppUInstr)(cb->used-1, &cb->instrs[cb->used-1]); - delta = 0; - } - if (dis) VG_(printf)("\n"); - } - } - if (VG_(clo_cachesim)) { - /* Patch instruction size into earliest JMP. */ - if (cb->used >= 2 && JMP == cb->instrs[cb->used - 2].opcode) { - cb->instrs[cb->used - 2].extra4b = delta; - } else { - LAST_UINSTR(cb).extra4b = delta; - } - } - - block_sane = VG_(saneUCodeBlock)(cb); - if (!block_sane) { - VG_(ppUCodeBlock)(cb, "block failing sanity check"); - vg_assert(block_sane); - } - - return eip - eip0; -} - - -/*--------------------------------------------------------------------*/ -/*--- end vg_to_ucode.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_translate.c b/coregrind/vg_translate.c deleted file mode 100644 index 27a02754cf..0000000000 --- a/coregrind/vg_translate.c +++ /dev/null @@ -1,3172 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- The JITter proper: register allocation & code improvement ---*/ -/*--- vg_translate.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_include.h" - - -/*------------------------------------------------------------*/ -/*--- Renamings of frequently-used global functions. ---*/ -/*------------------------------------------------------------*/ - -#define uInstr1 VG_(newUInstr1) -#define uInstr2 VG_(newUInstr2) -#define uInstr3 VG_(newUInstr3) -#define dis VG_(disassemble) -#define nameIReg VG_(nameOfIntReg) -#define nameISize VG_(nameOfIntSize) -#define uLiteral VG_(setLiteralField) -#define newTemp VG_(getNewTemp) -#define newShadow VG_(getNewShadow) - - -/*------------------------------------------------------------*/ -/*--- Memory management for the translater. ---*/ -/*------------------------------------------------------------*/ - -#define N_JITBLOCKS 4 -#define N_JITBLOCK_SZ 5000 - -static UChar jitstorage[N_JITBLOCKS][N_JITBLOCK_SZ]; -static Bool jitstorage_inuse[N_JITBLOCKS]; -static Bool jitstorage_initdone = False; - -static __inline__ void jitstorage_initialise ( void ) -{ - Int i; - if (jitstorage_initdone) return; - jitstorage_initdone = True; - for (i = 0; i < N_JITBLOCKS; i++) - jitstorage_inuse[i] = False; -} - -void* VG_(jitmalloc) ( Int nbytes ) -{ - Int i; - jitstorage_initialise(); - if (nbytes > N_JITBLOCK_SZ) { - /* VG_(printf)("too large: %d\n", nbytes); */ - return VG_(malloc)(VG_AR_PRIVATE, nbytes); - } - for (i = 0; i < N_JITBLOCKS; i++) { - if (!jitstorage_inuse[i]) { - jitstorage_inuse[i] = True; - /* VG_(printf)("alloc %d -> %d\n", nbytes, i ); */ - return & jitstorage[i][0]; - } - } - VG_(panic)("out of slots in vg_jitmalloc\n"); - return VG_(malloc)(VG_AR_PRIVATE, nbytes); -} - -void VG_(jitfree) ( void* ptr ) -{ - Int i; - jitstorage_initialise(); - for (i = 0; i < N_JITBLOCKS; i++) { - if (ptr == & jitstorage[i][0]) { - vg_assert(jitstorage_inuse[i]); - jitstorage_inuse[i] = False; - return; - } - } - VG_(free)(VG_AR_PRIVATE, ptr); -} - -/*------------------------------------------------------------*/ -/*--- Basics ---*/ -/*------------------------------------------------------------*/ - -UCodeBlock* VG_(allocCodeBlock) ( void ) -{ - UCodeBlock* cb = VG_(malloc)(VG_AR_PRIVATE, sizeof(UCodeBlock)); - cb->used = cb->size = cb->nextTemp = 0; - cb->instrs = NULL; - return cb; -} - - -void VG_(freeCodeBlock) ( UCodeBlock* cb ) -{ - if (cb->instrs) VG_(free)(VG_AR_PRIVATE, cb->instrs); - VG_(free)(VG_AR_PRIVATE, cb); -} - - -/* Ensure there's enough space in a block to add one uinstr. */ -static __inline__ -void ensureUInstr ( UCodeBlock* cb ) -{ - if (cb->used == cb->size) { - if (cb->instrs == NULL) { - vg_assert(cb->size == 0); - vg_assert(cb->used == 0); - cb->size = 8; - cb->instrs = VG_(malloc)(VG_AR_PRIVATE, 8 * sizeof(UInstr)); - } else { - Int i; - UInstr* instrs2 = VG_(malloc)(VG_AR_PRIVATE, - 2 * sizeof(UInstr) * cb->size); - for (i = 0; i < cb->used; i++) - instrs2[i] = cb->instrs[i]; - cb->size *= 2; - VG_(free)(VG_AR_PRIVATE, cb->instrs); - cb->instrs = instrs2; - } - } - - vg_assert(cb->used < cb->size); -} - - -__inline__ -void VG_(emptyUInstr) ( UInstr* u ) -{ - u->val1 = u->val2 = u->val3 = 0; - u->tag1 = u->tag2 = u->tag3 = NoValue; - u->flags_r = u->flags_w = FlagsEmpty; - u->jmpkind = JmpBoring; - u->smc_check = u->signed_widen = False; - u->lit32 = 0; - u->opcode = 0; - u->size = 0; - u->cond = 0; - u->extra4b = 0; -} - - -/* Add an instruction to a ucode block, and return the index of the - instruction. */ -__inline__ -void VG_(newUInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz, - Tag tag1, UInt val1, - Tag tag2, UInt val2, - Tag tag3, UInt val3 ) -{ - UInstr* ui; - ensureUInstr(cb); - ui = & cb->instrs[cb->used]; - cb->used++; - VG_(emptyUInstr)(ui); - ui->val1 = val1; - ui->val2 = val2; - ui->val3 = val3; - ui->opcode = opcode; - ui->tag1 = tag1; - ui->tag2 = tag2; - ui->tag3 = tag3; - ui->size = sz; - if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG); - if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG); - if (tag3 == TempReg) vg_assert(val3 != INVALID_TEMPREG); -} - - -__inline__ -void VG_(newUInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz, - Tag tag1, UInt val1, - Tag tag2, UInt val2 ) -{ - UInstr* ui; - ensureUInstr(cb); - ui = & cb->instrs[cb->used]; - cb->used++; - VG_(emptyUInstr)(ui); - ui->val1 = val1; - ui->val2 = val2; - ui->opcode = opcode; - ui->tag1 = tag1; - ui->tag2 = tag2; - ui->size = sz; - if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG); - if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG); -} - - -__inline__ -void VG_(newUInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz, - Tag tag1, UInt val1 ) -{ - UInstr* ui; - ensureUInstr(cb); - ui = & cb->instrs[cb->used]; - cb->used++; - VG_(emptyUInstr)(ui); - ui->val1 = val1; - ui->opcode = opcode; - ui->tag1 = tag1; - ui->size = sz; - if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG); -} - - -__inline__ -void VG_(newUInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz ) -{ - UInstr* ui; - ensureUInstr(cb); - ui = & cb->instrs[cb->used]; - cb->used++; - VG_(emptyUInstr)(ui); - ui->opcode = opcode; - ui->size = sz; -} - -/* Copy an instruction into the given codeblock. */ -__inline__ -void VG_(copyUInstr) ( UCodeBlock* cb, UInstr* instr ) -{ - ensureUInstr(cb); - cb->instrs[cb->used] = *instr; - cb->used++; -} - -/* Copy auxiliary info from one uinstr to another. */ -static __inline__ -void copyAuxInfoFromTo ( UInstr* src, UInstr* dst ) -{ - dst->cond = src->cond; - dst->extra4b = src->extra4b; - dst->smc_check = src->smc_check; - dst->signed_widen = src->signed_widen; - dst->jmpkind = src->jmpkind; - dst->flags_r = src->flags_r; - dst->flags_w = src->flags_w; -} - - -/* Set the flag R/W sets on a uinstr. */ -void VG_(setFlagRW) ( UInstr* u, FlagSet fr, FlagSet fw ) -{ - /* VG_(ppUInstr)(-1,u); */ - vg_assert(fr == (fr & FlagsALL)); - vg_assert(fw == (fw & FlagsALL)); - u->flags_r = fr; - u->flags_w = fw; -} - - -/* Set the lit32 field of the most recent uinsn. */ -void VG_(setLiteralField) ( UCodeBlock* cb, UInt lit32 ) -{ - LAST_UINSTR(cb).lit32 = lit32; -} - - -Bool VG_(anyFlagUse) ( UInstr* u ) -{ - return (u->flags_r != FlagsEmpty - || u->flags_w != FlagsEmpty); -} - - - - -/* Convert a rank in the range 0 .. VG_MAX_REALREGS-1 into an Intel - register number. This effectively defines the order in which real - registers are allocated. %ebp is excluded since it is permanently - reserved for pointing at VG_(baseBlock). %edi is a general spare - temp used for Left4 and various misc tag ops. - - Important! If you change the set of allocatable registers from - %eax, %ebx, %ecx, %edx, %esi you must change the - save/restore sequences in various places to match! -*/ -__inline__ Int VG_(rankToRealRegNo) ( Int rank ) -{ - switch (rank) { -# if 1 - /* Probably the best allocation ordering. */ - case 0: return R_EAX; - case 1: return R_EBX; - case 2: return R_ECX; - case 3: return R_EDX; - case 4: return R_ESI; -# else - /* Contrary; probably the worst. Helpful for debugging, tho. */ - case 4: return R_EAX; - case 3: return R_EBX; - case 2: return R_ECX; - case 1: return R_EDX; - case 0: return R_ESI; -# endif - default: VG_(panic)("rankToRealRegNo"); - } -} - - -/*------------------------------------------------------------*/ -/*--- Sanity checking uinstrs. ---*/ -/*------------------------------------------------------------*/ - -/* This seems as good a place as any to record some important stuff - about ucode semantics. - - * TempRegs are 32 bits wide. LOADs of 8/16 bit values into a - TempReg are defined to zero-extend the loaded value to 32 bits. - This is needed to make the translation of movzbl et al work - properly. - - * Similarly, GETs of a 8/16 bit ArchRegs are zero-extended. - - * Arithmetic on TempRegs is at the specified size. For example, - SUBW t1, t2 has to result in a real 16 bit x86 subtraction - being emitted -- not a 32 bit one. - - * On some insns we allow the cc bit to be set. If so, the - intention is that the simulated machine's %eflags register - is copied into that of the real machine before the insn, - and copied back again afterwards. This means that the - code generated for that insn must be very careful only to - update %eflags in the intended way. This is particularly - important for the routines referenced by CALL insns. -*/ - -/* Meaning of operand kinds is as follows: - - ArchReg is a register of the simulated CPU, stored in memory, - in vg_m_state.m_eax .. m_edi. These values are stored - using the Intel register encoding. - - RealReg is a register of the real CPU. There are VG_MAX_REALREGS - available for allocation. As with ArchRegs, these values - are stored using the Intel register encoding. - - TempReg is a temporary register used to express the results of - disassembly. There is an unlimited supply of them -- - register allocation and spilling eventually assigns them - to RealRegs. - - SpillNo is a spill slot number. The number of required spill - slots is VG_MAX_PSEUDOS, in general. Only allowed - as the ArchReg operand of GET and PUT. - - Lit16 is a signed 16-bit literal value. - - Literal is a 32-bit literal value. Each uinstr can only hold - one of these. - - The disassembled code is expressed purely in terms of ArchReg, - TempReg and Literal operands. Eventually, register allocation - removes all the TempRegs, giving a result using ArchRegs, RealRegs, - and Literals. New x86 code can easily be synthesised from this. - There are carefully designed restrictions on which insns can have - which operands, intended to make it possible to generate x86 code - from the result of register allocation on the ucode efficiently and - without need of any further RealRegs. - - Restrictions on insns (as generated by the disassembler) are as - follows: - - A=ArchReg S=SpillNo T=TempReg L=Literal R=RealReg - N=NoValue - - GETF T N N - PUTF T N N - - GET A,S T N - PUT T A,S N - LOAD T T N - STORE T T N - MOV T,L T N - CMOV T T N - WIDEN T N N - JMP T,L N N - CALLM L N N - CALLM_S N N N - CALLM_E N N N - PUSH,POP T N N - CLEAR L N N - - AND, OR - T T N - - ADD, ADC, XOR, SUB, SBB - A,L,T T N - - SHL, SHR, SAR, ROL, ROR, RCL, RCR - L,T T N - - NOT, NEG, INC, DEC, CC2VAL, BSWAP - T N N - - JIFZ T L N - - FPU_R L T N - FPU_W L T N - FPU L T N - - LEA1 T T (const in a seperate field) - LEA2 T T T (const & shift ditto) - - INCEIP L N N - - and for instrumentation insns: - - LOADV T T N - STOREV T,L T N - GETV A T N - PUTV T,L A N - GETVF T N N - PUTVF T N N - WIDENV T N N - TESTV A,T N N - SETV A,T N N - TAG1 T N N - TAG2 T T N - - Before register allocation, S operands should not appear anywhere. - After register allocation, all T operands should have been - converted into Rs, and S operands are allowed in GET and PUT -- - denoting spill saves/restores. - - The size field should be 0 for insns for which it is meaningless, - ie those which do not directly move/operate on data. -*/ -Bool VG_(saneUInstr) ( Bool beforeRA, UInstr* u ) -{ -# define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg)) -# define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg)) -# define TR3 (beforeRA ? (u->tag3 == TempReg) : (u->tag3 == RealReg)) -# define A1 (u->tag1 == ArchReg) -# define A2 (u->tag2 == ArchReg) -# define AS1 ((u->tag1 == ArchReg) || ((!beforeRA && (u->tag1 == SpillNo)))) -# define AS2 ((u->tag2 == ArchReg) || ((!beforeRA && (u->tag2 == SpillNo)))) -# define AS3 ((u->tag3 == ArchReg) || ((!beforeRA && (u->tag3 == SpillNo)))) -# define L1 (u->tag1 == Literal && u->val1 == 0) -# define L2 (u->tag2 == Literal && u->val2 == 0) -# define Ls1 (u->tag1 == Lit16) -# define Ls3 (u->tag3 == Lit16) -# define N1 (u->tag1 == NoValue) -# define N2 (u->tag2 == NoValue) -# define N3 (u->tag3 == NoValue) -# define SZ4 (u->size == 4) -# define SZ2 (u->size == 2) -# define SZ1 (u->size == 1) -# define SZ0 (u->size == 0) -# define CC0 (u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty) -# define FLG_RD (u->flags_r == FlagsALL && u->flags_w == FlagsEmpty) -# define FLG_WR (u->flags_r == FlagsEmpty && u->flags_w == FlagsALL) -# define FLG_RD_WR_MAYBE \ - ((u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty) \ - || (u->flags_r == FlagsEmpty && u->flags_w == FlagsZCP) \ - || (u->flags_r == FlagsZCP && u->flags_w == FlagsEmpty)) -# define CC1 (!(CC0)) -# define SZ4_IF_TR1 ((u->tag1 == TempReg || u->tag1 == RealReg) \ - ? (u->size == 4) : True) - - Int n_lits = 0; - if (u->tag1 == Literal) n_lits++; - if (u->tag2 == Literal) n_lits++; - if (u->tag3 == Literal) n_lits++; - if (n_lits > 1) - return False; - - switch (u->opcode) { - case GETF: - return (SZ2 || SZ4) && TR1 && N2 && N3 && FLG_RD; - case PUTF: - return (SZ2 || SZ4) && TR1 && N2 && N3 && FLG_WR; - case CALLM_S: case CALLM_E: - return SZ0 && N1 && N2 && N3; - case INCEIP: - return SZ0 && CC0 && Ls1 && N2 && N3; - case LEA1: - return CC0 && TR1 && TR2 && N3 && SZ4; - case LEA2: - return CC0 && TR1 && TR2 && TR3 && SZ4; - case NOP: - return SZ0 && CC0 && N1 && N2 && N3; - case GET: - return CC0 && AS1 && TR2 && N3; - case PUT: - return CC0 && TR1 && AS2 && N3; - case LOAD: case STORE: - return CC0 && TR1 && TR2 && N3; - case MOV: - return CC0 && (TR1 || L1) && TR2 && N3 && SZ4_IF_TR1; - case CMOV: - return CC1 && TR1 && TR2 && N3 && SZ4; - case JMP: - return (u->cond==CondAlways ? CC0 : CC1) - && (TR1 || L1) && N2 && SZ0 && N3; - case CLEAR: - return CC0 && Ls1 && N2 && SZ0 && N3; - case CALLM: - return SZ0 && Ls1 && N2 && N3; - case PUSH: case POP: - return CC0 && TR1 && N2 && N3; - case AND: case OR: - return TR1 && TR2 && N3; - case ADD: case ADC: case XOR: case SUB: case SBB: - return (A1 || TR1 || L1) && TR2 && N3; - case SHL: case SHR: case SAR: case ROL: case ROR: case RCL: case RCR: - return (TR1 || L1) && TR2 && N3; - case NOT: case NEG: case INC: case DEC: - return TR1 && N2 && N3; - case BSWAP: - return TR1 && N2 && N3 && CC0 && SZ4; - case CC2VAL: - return CC1 && SZ1 && TR1 && N2 && N3; - case JIFZ: - return CC0 && SZ4 && TR1 && L2 && N3; - case FPU_R: case FPU_W: - return CC0 && Ls1 && TR2 && N3; - case FPU: - return SZ0 && FLG_RD_WR_MAYBE && Ls1 && N2 && N3; - case LOADV: - return CC0 && TR1 && TR2 && N3; - case STOREV: - return CC0 && (TR1 || L1) && TR2 && N3; - case GETV: - return CC0 && A1 && TR2 && N3; - case PUTV: - return CC0 && (TR1 || L1) && A2 && N3; - case GETVF: - return CC0 && TR1 && N2 && N3 && SZ0; - case PUTVF: - return CC0 && TR1 && N2 && N3 && SZ0; - case WIDEN: - return CC0 && TR1 && N2 && N3; - case TESTV: - return CC0 && (A1 || TR1) && N2 && N3; - case SETV: - return CC0 && (A1 || TR1) && N2 && N3; - case TAG1: - return CC0 && TR1 && N2 && Ls3 && SZ0; - case TAG2: - return CC0 && TR1 && TR2 && Ls3 && SZ0; - default: - VG_(panic)("vg_saneUInstr: unhandled opcode"); - } -# undef SZ4_IF_TR1 -# undef CC0 -# undef CC1 -# undef SZ4 -# undef SZ2 -# undef SZ1 -# undef SZ0 -# undef TR1 -# undef TR2 -# undef TR3 -# undef A1 -# undef A2 -# undef AS1 -# undef AS2 -# undef AS3 -# undef L1 -# undef Ls1 -# undef L2 -# undef Ls3 -# undef N1 -# undef N2 -# undef N3 -# undef FLG_RD -# undef FLG_WR -# undef FLG_RD_WR_MAYBE -} - - -/* Sanity checks to do with CALLMs in UCodeBlocks. */ -Bool VG_(saneUCodeBlock) ( UCodeBlock* cb ) -{ - Int callm = 0; - Int callm_s = 0; - Int callm_e = 0; - Int callm_ptr, calls_ptr; - Int i, j, t; - Bool incall = False; - - /* Ensure the number of CALLM, CALLM_S and CALLM_E are the same. */ - - for (i = 0; i < cb->used; i++) { - switch (cb->instrs[i].opcode) { - case CALLM: - if (!incall) return False; - callm++; - break; - case CALLM_S: - if (incall) return False; - incall = True; - callm_s++; - break; - case CALLM_E: - if (!incall) return False; - incall = False; - callm_e++; - break; - case PUSH: case POP: case CLEAR: - if (!incall) return False; - break; - default: - break; - } - } - if (incall) return False; - if (callm != callm_s || callm != callm_e) return False; - - /* Check the sections between CALLM_S and CALLM's. Ensure that no - PUSH uinsn pushes any TempReg that any other PUSH in the same - section pushes. Ie, check that the TempReg args to PUSHes in - the section are unique. If not, the instrumenter generates - incorrect code for CALLM insns. */ - - callm_ptr = 0; - - find_next_CALLM: - /* Search for the next interval, making calls_ptr .. callm_ptr - bracket it. */ - while (callm_ptr < cb->used - && cb->instrs[callm_ptr].opcode != CALLM) - callm_ptr++; - if (callm_ptr == cb->used) - return True; - vg_assert(cb->instrs[callm_ptr].opcode == CALLM); - - calls_ptr = callm_ptr - 1; - while (cb->instrs[calls_ptr].opcode != CALLM_S) - calls_ptr--; - vg_assert(cb->instrs[calls_ptr].opcode == CALLM_S); - vg_assert(calls_ptr >= 0); - - /* VG_(printf)("interval from %d to %d\n", calls_ptr, callm_ptr ); */ - - /* For each PUSH insn in the interval ... */ - for (i = calls_ptr + 1; i < callm_ptr; i++) { - if (cb->instrs[i].opcode != PUSH) continue; - t = cb->instrs[i].val1; - /* Ensure no later PUSH insns up to callm_ptr push the same - TempReg. Return False if any such are found. */ - for (j = i+1; j < callm_ptr; j++) { - if (cb->instrs[j].opcode == PUSH && - cb->instrs[j].val1 == t) - return False; - } - } - - /* This interval is clean. Keep going ... */ - callm_ptr++; - goto find_next_CALLM; -} - - -/*------------------------------------------------------------*/ -/*--- Printing uinstrs. ---*/ -/*------------------------------------------------------------*/ - -Char* VG_(nameCondcode) ( Condcode cond ) -{ - switch (cond) { - case CondO: return "o"; - case CondNO: return "no"; - case CondB: return "b"; - case CondNB: return "nb"; - case CondZ: return "z"; - case CondNZ: return "nz"; - case CondBE: return "be"; - case CondNBE: return "nbe"; - case CondS: return "s"; - case ConsNS: return "ns"; - case CondP: return "p"; - case CondNP: return "np"; - case CondL: return "l"; - case CondNL: return "nl"; - case CondLE: return "le"; - case CondNLE: return "nle"; - case CondAlways: return "MP"; /* hack! */ - default: VG_(panic)("nameCondcode"); - } -} - - -static void vg_ppFlagSet ( Char* prefix, FlagSet set ) -{ - VG_(printf)("%s", prefix); - if (set & FlagD) VG_(printf)("D"); - if (set & FlagO) VG_(printf)("O"); - if (set & FlagS) VG_(printf)("S"); - if (set & FlagZ) VG_(printf)("Z"); - if (set & FlagA) VG_(printf)("A"); - if (set & FlagC) VG_(printf)("C"); - if (set & FlagP) VG_(printf)("P"); -} - - -static void ppTempReg ( Int tt ) -{ - if ((tt & 1) == 0) - VG_(printf)("t%d", tt); - else - VG_(printf)("q%d", tt-1); -} - - -static void ppUOperand ( UInstr* u, Int operandNo, Int sz, Bool parens ) -{ - UInt tag, val; - switch (operandNo) { - case 1: tag = u->tag1; val = u->val1; break; - case 2: tag = u->tag2; val = u->val2; break; - case 3: tag = u->tag3; val = u->val3; break; - default: VG_(panic)("ppUOperand(1)"); - } - if (tag == Literal) val = u->lit32; - - if (parens) VG_(printf)("("); - switch (tag) { - case TempReg: ppTempReg(val); break; - case RealReg: VG_(printf)("%s",nameIReg(sz==0 ? 4 : sz,val)); break; - case Literal: VG_(printf)("$0x%x", val); break; - case Lit16: VG_(printf)("$0x%x", val); break; - case NoValue: VG_(printf)("NoValue"); break; - case ArchReg: VG_(printf)("%S",nameIReg(sz,val)); break; - case SpillNo: VG_(printf)("spill%d", val); break; - default: VG_(panic)("ppUOperand(2)"); - } - if (parens) VG_(printf)(")"); -} - - -Char* VG_(nameUOpcode) ( Bool upper, Opcode opc ) -{ - switch (opc) { - case ADD: return (upper ? "ADD" : "add"); - case ADC: return (upper ? "ADC" : "adc"); - case AND: return (upper ? "AND" : "and"); - case OR: return (upper ? "OR" : "or"); - case XOR: return (upper ? "XOR" : "xor"); - case SUB: return (upper ? "SUB" : "sub"); - case SBB: return (upper ? "SBB" : "sbb"); - case SHL: return (upper ? "SHL" : "shl"); - case SHR: return (upper ? "SHR" : "shr"); - case SAR: return (upper ? "SAR" : "sar"); - case ROL: return (upper ? "ROL" : "rol"); - case ROR: return (upper ? "ROR" : "ror"); - case RCL: return (upper ? "RCL" : "rcl"); - case RCR: return (upper ? "RCR" : "rcr"); - case NOT: return (upper ? "NOT" : "not"); - case NEG: return (upper ? "NEG" : "neg"); - case INC: return (upper ? "INC" : "inc"); - case DEC: return (upper ? "DEC" : "dec"); - case BSWAP: return (upper ? "BSWAP" : "bswap"); - default: break; - } - if (!upper) VG_(panic)("vg_nameUOpcode: invalid !upper"); - switch (opc) { - case GETVF: return "GETVF"; - case PUTVF: return "PUTVF"; - case TAG1: return "TAG1"; - case TAG2: return "TAG2"; - case CALLM_S: return "CALLM_S"; - case CALLM_E: return "CALLM_E"; - case INCEIP: return "INCEIP"; - case LEA1: return "LEA1"; - case LEA2: return "LEA2"; - case NOP: return "NOP"; - case GET: return "GET"; - case PUT: return "PUT"; - case GETF: return "GETF"; - case PUTF: return "PUTF"; - case LOAD: return "LD" ; - case STORE: return "ST" ; - case MOV: return "MOV"; - case CMOV: return "CMOV"; - case WIDEN: return "WIDEN"; - case JMP: return "J" ; - case JIFZ: return "JIFZ" ; - case CALLM: return "CALLM"; - case PUSH: return "PUSH" ; - case POP: return "POP" ; - case CLEAR: return "CLEAR"; - case CC2VAL: return "CC2VAL"; - case FPU_R: return "FPU_R"; - case FPU_W: return "FPU_W"; - case FPU: return "FPU" ; - case LOADV: return "LOADV"; - case STOREV: return "STOREV"; - case GETV: return "GETV"; - case PUTV: return "PUTV"; - case TESTV: return "TESTV"; - case SETV: return "SETV"; - default: VG_(panic)("nameUOpcode: unhandled case"); - } -} - - -void VG_(ppUInstr) ( Int instrNo, UInstr* u ) -{ - VG_(printf)("\t%4d: %s", instrNo, - VG_(nameUOpcode)(True, u->opcode)); - if (u->opcode == JMP || u->opcode == CC2VAL) - VG_(printf)("%s", VG_(nameCondcode(u->cond))); - - switch (u->size) { - case 0: VG_(printf)("o"); break; - case 1: VG_(printf)("B"); break; - case 2: VG_(printf)("W"); break; - case 4: VG_(printf)("L"); break; - case 8: VG_(printf)("Q"); break; - default: VG_(printf)("%d", (Int)u->size); break; - } - - switch (u->opcode) { - - case TAG1: - VG_(printf)("\t"); - ppUOperand(u, 1, 4, False); - VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 )); - ppUOperand(u, 1, 4, False); - VG_(printf)(" )"); - break; - - case TAG2: - VG_(printf)("\t"); - ppUOperand(u, 2, 4, False); - VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 )); - ppUOperand(u, 1, 4, False); - VG_(printf)(", "); - ppUOperand(u, 2, 4, False); - VG_(printf)(" )"); - break; - - case CALLM_S: case CALLM_E: - break; - - case INCEIP: - VG_(printf)("\t$%d", u->val1); - break; - - case LEA2: - VG_(printf)("\t%d(" , u->lit32); - ppUOperand(u, 1, 4, False); - VG_(printf)(","); - ppUOperand(u, 2, 4, False); - VG_(printf)(",%d), ", (Int)u->extra4b); - ppUOperand(u, 3, 4, False); - break; - - case LEA1: - VG_(printf)("\t%d" , u->lit32); - ppUOperand(u, 1, 4, True); - VG_(printf)(", "); - ppUOperand(u, 2, 4, False); - break; - - case NOP: - break; - - case FPU_W: - VG_(printf)("\t0x%x:0x%x, ", - (u->val1 >> 8) & 0xFF, u->val1 & 0xFF ); - ppUOperand(u, 2, 4, True); - break; - - case FPU_R: - VG_(printf)("\t"); - ppUOperand(u, 2, 4, True); - VG_(printf)(", 0x%x:0x%x", - (u->val1 >> 8) & 0xFF, u->val1 & 0xFF ); - break; - - case FPU: - VG_(printf)("\t0x%x:0x%x", - (u->val1 >> 8) & 0xFF, u->val1 & 0xFF ); - break; - - case STOREV: case LOADV: - case GET: case PUT: case MOV: case LOAD: case STORE: case CMOV: - VG_(printf)("\t"); - ppUOperand(u, 1, u->size, u->opcode==LOAD || u->opcode==LOADV); - VG_(printf)(", "); - ppUOperand(u, 2, u->size, u->opcode==STORE || u->opcode==STOREV); - break; - - case GETF: case PUTF: - VG_(printf)("\t"); - ppUOperand(u, 1, u->size, False); - break; - - case JMP: case CC2VAL: - case PUSH: case POP: case CLEAR: case CALLM: - if (u->opcode == JMP) { - switch (u->jmpkind) { - case JmpCall: VG_(printf)("-c"); break; - case JmpRet: VG_(printf)("-r"); break; - case JmpSyscall: VG_(printf)("-sys"); break; - case JmpClientReq: VG_(printf)("-cli"); break; - default: break; - } - } - VG_(printf)("\t"); - ppUOperand(u, 1, u->size, False); - break; - - case JIFZ: - VG_(printf)("\t"); - ppUOperand(u, 1, u->size, False); - VG_(printf)(", "); - ppUOperand(u, 2, u->size, False); - break; - - case PUTVF: case GETVF: - VG_(printf)("\t"); - ppUOperand(u, 1, 0, False); - break; - - case NOT: case NEG: case INC: case DEC: case BSWAP: - VG_(printf)("\t"); - ppUOperand(u, 1, u->size, False); - break; - - case ADD: case ADC: case AND: case OR: - case XOR: case SUB: case SBB: - case SHL: case SHR: case SAR: - case ROL: case ROR: case RCL: case RCR: - VG_(printf)("\t"); - ppUOperand(u, 1, u->size, False); - VG_(printf)(", "); - ppUOperand(u, 2, u->size, False); - break; - - case GETV: case PUTV: - VG_(printf)("\t"); - ppUOperand(u, 1, u->opcode==PUTV ? 4 : u->size, False); - VG_(printf)(", "); - ppUOperand(u, 2, u->opcode==GETV ? 4 : u->size, False); - break; - - case WIDEN: - VG_(printf)("_%c%c", VG_(toupper)(nameISize(u->extra4b)), - u->signed_widen?'s':'z'); - VG_(printf)("\t"); - ppUOperand(u, 1, u->size, False); - break; - - case TESTV: case SETV: - VG_(printf)("\t"); - ppUOperand(u, 1, u->size, False); - break; - - default: VG_(panic)("ppUInstr: unhandled opcode"); - } - - if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) { - VG_(printf)(" ("); - if (u->flags_r != FlagsEmpty) - vg_ppFlagSet("-r", u->flags_r); - if (u->flags_w != FlagsEmpty) - vg_ppFlagSet("-w", u->flags_w); - VG_(printf)(")"); - } - VG_(printf)("\n"); -} - - -void VG_(ppUCodeBlock) ( UCodeBlock* cb, Char* title ) -{ - Int i; - VG_(printf)("\n%s\n", title); - for (i = 0; i < cb->used; i++) - if (0 || cb->instrs[i].opcode != NOP) - VG_(ppUInstr) ( i, &cb->instrs[i] ); - VG_(printf)("\n"); -} - - -/*------------------------------------------------------------*/ -/*--- uinstr helpers for register allocation ---*/ -/*--- and code improvement. ---*/ -/*------------------------------------------------------------*/ - -/* A structure for communicating temp uses, and for indicating - temp->real register mappings for patchUInstr. */ -typedef - struct { - Int realNo; - Int tempNo; - Bool isWrite; - } - TempUse; - - -/* Get the temp use of a uinstr, parking them in an array supplied by - the caller, which is assumed to be big enough. Return the number - of entries. Insns which read _and_ write a register wind up - mentioning it twice. Entries are placed in the array in program - order, so that if a reg is read-modified-written, it appears first - as a read and then as a write. -*/ -static __inline__ -Int getTempUsage ( UInstr* u, TempUse* arr ) -{ - -# define RD(ono) \ - if (mycat(u->tag,ono) == TempReg) \ - { arr[n].tempNo = mycat(u->val,ono); \ - arr[n].isWrite = False; n++; } -# define WR(ono) \ - if (mycat(u->tag,ono) == TempReg) \ - { arr[n].tempNo = mycat(u->val,ono); \ - arr[n].isWrite = True; n++; } - - Int n = 0; - switch (u->opcode) { - case LEA1: RD(1); WR(2); break; - case LEA2: RD(1); RD(2); WR(3); break; - - case NOP: case FPU: case INCEIP: case CALLM_S: case CALLM_E: break; - case FPU_R: case FPU_W: RD(2); break; - - case GETF: WR(1); break; - case PUTF: RD(1); break; - - case GET: WR(2); break; - case PUT: RD(1); break; - case LOAD: RD(1); WR(2); break; - case STORE: RD(1); RD(2); break; - case MOV: RD(1); WR(2); break; - - case JMP: RD(1); break; - case CLEAR: case CALLM: break; - - case PUSH: RD(1); break; - case POP: WR(1); break; - - case TAG2: - case CMOV: - case ADD: case ADC: case AND: case OR: - case XOR: case SUB: case SBB: - RD(1); RD(2); WR(2); break; - - case SHL: case SHR: case SAR: - case ROL: case ROR: case RCL: case RCR: - RD(1); RD(2); WR(2); break; - - case NOT: case NEG: case INC: case DEC: case TAG1: case BSWAP: - RD(1); WR(1); break; - - case WIDEN: RD(1); WR(1); break; - - case CC2VAL: WR(1); break; - case JIFZ: RD(1); break; - - /* These sizes are only ever consulted when the instrumentation - code is being added, so the following can return - manifestly-bogus sizes. */ - case LOADV: RD(1); WR(2); break; - case STOREV: RD(1); RD(2); break; - case GETV: WR(2); break; - case PUTV: RD(1); break; - case TESTV: RD(1); break; - case SETV: WR(1); break; - case PUTVF: RD(1); break; - case GETVF: WR(1); break; - - default: VG_(panic)("getTempUsage: unhandled opcode"); - } - return n; - -# undef RD -# undef WR -} - - -/* Change temp regs in u into real regs, as directed by tmap. */ -static __inline__ -void patchUInstr ( UInstr* u, TempUse* tmap, Int n_tmap ) -{ - Int i; - if (u->tag1 == TempReg) { - for (i = 0; i < n_tmap; i++) - if (tmap[i].tempNo == u->val1) break; - if (i == n_tmap) VG_(panic)("patchUInstr(1)"); - u->tag1 = RealReg; - u->val1 = tmap[i].realNo; - } - if (u->tag2 == TempReg) { - for (i = 0; i < n_tmap; i++) - if (tmap[i].tempNo == u->val2) break; - if (i == n_tmap) VG_(panic)("patchUInstr(2)"); - u->tag2 = RealReg; - u->val2 = tmap[i].realNo; - } - if (u->tag3 == TempReg) { - for (i = 0; i < n_tmap; i++) - if (tmap[i].tempNo == u->val3) break; - if (i == n_tmap) VG_(panic)("patchUInstr(3)"); - u->tag3 = RealReg; - u->val3 = tmap[i].realNo; - } -} - - -/* Tedious x86-specific hack which compensates for the fact that the - register numbers for %ah .. %dh do not correspond to those for %eax - .. %edx. It maps a (reg size, reg no) pair to the number of the - containing 32-bit reg. */ -static __inline__ -Int containingArchRegOf ( Int sz, Int aregno ) -{ - switch (sz) { - case 4: return aregno; - case 2: return aregno; - case 1: return aregno >= 4 ? aregno-4 : aregno; - default: VG_(panic)("containingArchRegOf"); - } -} - - -/* If u reads an ArchReg, return the number of the containing arch - reg. Otherwise return -1. Used in redundant-PUT elimination. */ -static __inline__ -Int maybe_uinstrReadsArchReg ( UInstr* u ) -{ - switch (u->opcode) { - case GET: - case ADD: case ADC: case AND: case OR: - case XOR: case SUB: case SBB: - case SHL: case SHR: case SAR: case ROL: - case ROR: case RCL: case RCR: - if (u->tag1 == ArchReg) - return containingArchRegOf ( u->size, u->val1 ); - else - return -1; - - case GETF: case PUTF: - case CALLM_S: case CALLM_E: - case INCEIP: - case LEA1: - case LEA2: - case NOP: - case PUT: - case LOAD: - case STORE: - case MOV: - case CMOV: - case JMP: - case CALLM: case CLEAR: case PUSH: case POP: - case NOT: case NEG: case INC: case DEC: case BSWAP: - case CC2VAL: - case JIFZ: - case FPU: case FPU_R: case FPU_W: - case WIDEN: - return -1; - - default: - VG_(ppUInstr)(0,u); - VG_(panic)("maybe_uinstrReadsArchReg: unhandled opcode"); - } -} - -static __inline__ -Bool uInstrMentionsTempReg ( UInstr* u, Int tempreg ) -{ - Int i, k; - TempUse tempUse[3]; - k = getTempUsage ( u, &tempUse[0] ); - for (i = 0; i < k; i++) - if (tempUse[i].tempNo == tempreg) - return True; - return False; -} - - -/*------------------------------------------------------------*/ -/*--- ucode improvement. ---*/ -/*------------------------------------------------------------*/ - -/* Improve the code in cb by doing - -- Redundant ArchReg-fetch elimination - -- Redundant PUT elimination - -- Redundant cond-code restore/save elimination - The overall effect of these is to allow target registers to be - cached in host registers over multiple target insns. -*/ -static void vg_improve ( UCodeBlock* cb ) -{ - Int i, j, k, m, n, ar, tr, told, actual_areg; - Int areg_map[8]; - Bool annul_put[8]; - TempUse tempUse[3]; - UInstr* u; - Bool wr; - Int* last_live_before; - FlagSet future_dead_flags; - - if (cb->nextTemp > 0) - last_live_before = VG_(jitmalloc) ( cb->nextTemp * sizeof(Int) ); - else - last_live_before = NULL; - - - /* PASS 1: redundant GET elimination. (Actually, more general than - that -- eliminates redundant fetches of ArchRegs). */ - - /* Find the live-range-ends for all temporaries. Duplicates code - in the register allocator :-( */ - - for (i = 0; i < cb->nextTemp; i++) last_live_before[i] = -1; - - for (i = cb->used-1; i >= 0; i--) { - u = &cb->instrs[i]; - - k = getTempUsage(u, &tempUse[0]); - - /* For each temp usage ... bwds in program order. */ - for (j = k-1; j >= 0; j--) { - tr = tempUse[j].tempNo; - wr = tempUse[j].isWrite; - if (last_live_before[tr] == -1) { - vg_assert(tr >= 0 && tr < cb->nextTemp); - last_live_before[tr] = wr ? (i+1) : i; - } - } - - } - -# define BIND_ARCH_TO_TEMP(archreg,tempreg)\ - { Int q; \ - /* Invalidate any old binding(s) to tempreg. */ \ - for (q = 0; q < 8; q++) \ - if (areg_map[q] == tempreg) areg_map[q] = -1; \ - /* Add the new binding. */ \ - areg_map[archreg] = (tempreg); \ - } - - /* Set up the A-reg map. */ - for (i = 0; i < 8; i++) areg_map[i] = -1; - - /* Scan insns. */ - for (i = 0; i < cb->used; i++) { - u = &cb->instrs[i]; - if (u->opcode == GET && u->size == 4) { - /* GET; see if it can be annulled. */ - vg_assert(u->tag1 == ArchReg); - vg_assert(u->tag2 == TempReg); - ar = u->val1; - tr = u->val2; - told = areg_map[ar]; - if (told != -1 && last_live_before[told] <= i) { - /* ar already has an old mapping to told, but that runs - out here. Annul this GET, rename tr to told for the - rest of the block, and extend told's live range to that - of tr. */ - u->opcode = NOP; - u->tag1 = u->tag2 = NoValue; - n = last_live_before[tr] + 1; - if (n > cb->used) n = cb->used; - last_live_before[told] = last_live_before[tr]; - last_live_before[tr] = i-1; - if (VG_(disassemble)) - VG_(printf)( - "at %d: delete GET, rename t%d to t%d in (%d .. %d)\n", - i, tr, told,i+1, n-1); - for (m = i+1; m < n; m++) { - if (cb->instrs[m].tag1 == TempReg - && cb->instrs[m].val1 == tr) - cb->instrs[m].val1 = told; - if (cb->instrs[m].tag2 == TempReg - && cb->instrs[m].val2 == tr) - cb->instrs[m].val2 = told; - } - BIND_ARCH_TO_TEMP(ar,told); - } - else - BIND_ARCH_TO_TEMP(ar,tr); - } - else if (u->opcode == GET && u->size != 4) { - /* Invalidate any mapping for this archreg. */ - actual_areg = containingArchRegOf ( u->size, u->val1 ); - areg_map[actual_areg] = -1; - } - else if (u->opcode == PUT && u->size == 4) { - /* PUT; re-establish t -> a binding */ - vg_assert(u->tag1 == TempReg); - vg_assert(u->tag2 == ArchReg); - BIND_ARCH_TO_TEMP(u->val2, u->val1); - } - else if (u->opcode == PUT && u->size != 4) { - /* Invalidate any mapping for this archreg. */ - actual_areg = containingArchRegOf ( u->size, u->val2 ); - areg_map[actual_areg] = -1; - } else { - - /* see if insn has an archreg as a read operand; if so try to - map it. */ - if (u->tag1 == ArchReg && u->size == 4 - && areg_map[u->val1] != -1) { - switch (u->opcode) { - case ADD: case SUB: case AND: case OR: case XOR: - case ADC: case SBB: - case SHL: case SHR: case SAR: case ROL: case ROR: - case RCL: case RCR: - if (VG_(disassemble)) - VG_(printf)( - "at %d: change ArchReg %S to TempReg t%d\n", - i, nameIReg(4,u->val1), areg_map[u->val1]); - u->tag1 = TempReg; - u->val1 = areg_map[u->val1]; - /* Remember to extend the live range of the TempReg, - if necessary. */ - if (last_live_before[u->val1] < i) - last_live_before[u->val1] = i; - break; - default: - break; - } - } - - /* boring insn; invalidate any mappings to temps it writes */ - k = getTempUsage(u, &tempUse[0]); - - for (j = 0; j < k; j++) { - wr = tempUse[j].isWrite; - if (!wr) continue; - tr = tempUse[j].tempNo; - for (m = 0; m < 8; m++) - if (areg_map[m] == tr) areg_map[m] = -1; - } - } - - } - -# undef BIND_ARCH_TO_TEMP - - /* PASS 2: redundant PUT elimination. Don't annul (delay) puts of - %ESP, since the memory check machinery always requires the - in-memory value of %ESP to be up to date. Although this isn't - actually required by other analyses (cache simulation), it's - simplest to be consistent for all end-uses. */ - for (j = 0; j < 8; j++) - annul_put[j] = False; - - for (i = cb->used-1; i >= 0; i--) { - u = &cb->instrs[i]; - if (u->opcode == NOP) continue; - - if (u->opcode == PUT && u->size == 4) { - vg_assert(u->tag2 == ArchReg); - actual_areg = containingArchRegOf ( 4, u->val2 ); - if (annul_put[actual_areg]) { - vg_assert(actual_areg != R_ESP); - u->opcode = NOP; - u->tag1 = u->tag2 = NoValue; - if (VG_(disassemble)) - VG_(printf)("at %d: delete PUT\n", i ); - } else { - if (actual_areg != R_ESP) - annul_put[actual_areg] = True; - } - } - else if (u->opcode == PUT && u->size != 4) { - actual_areg = containingArchRegOf ( u->size, u->val2 ); - annul_put[actual_areg] = False; - } - else if (u->opcode == JMP || u->opcode == JIFZ - || u->opcode == CALLM) { - for (j = 0; j < 8; j++) - annul_put[j] = False; - } - else { - /* If an instruction reads an ArchReg, the immediately - preceding PUT cannot be annulled. */ - actual_areg = maybe_uinstrReadsArchReg ( u ); - if (actual_areg != -1) - annul_put[actual_areg] = False; - } - } - - /* PASS 2a: redundant-move elimination. Given MOV t1, t2 and t1 is - dead after this point, annul the MOV insn and rename t2 to t1. - Further modifies the last_live_before map. */ - -# if 0 - VG_(ppUCodeBlock)(cb, "Before MOV elimination" ); - for (i = 0; i < cb->nextTemp; i++) - VG_(printf)("llb[t%d]=%d ", i, last_live_before[i]); - VG_(printf)("\n"); -# endif - - for (i = 0; i < cb->used-1; i++) { - u = &cb->instrs[i]; - if (u->opcode != MOV) continue; - if (u->tag1 == Literal) continue; - vg_assert(u->tag1 == TempReg); - vg_assert(u->tag2 == TempReg); - if (last_live_before[u->val1] == i) { - if (VG_(disassemble)) - VG_(printf)( - "at %d: delete MOV, rename t%d to t%d in (%d .. %d)\n", - i, u->val2, u->val1, i+1, last_live_before[u->val2] ); - for (j = i+1; j <= last_live_before[u->val2]; j++) { - if (cb->instrs[j].tag1 == TempReg - && cb->instrs[j].val1 == u->val2) - cb->instrs[j].val1 = u->val1; - if (cb->instrs[j].tag2 == TempReg - && cb->instrs[j].val2 == u->val2) - cb->instrs[j].val2 = u->val1; - } - last_live_before[u->val1] = last_live_before[u->val2]; - last_live_before[u->val2] = i-1; - u->opcode = NOP; - u->tag1 = u->tag2 = NoValue; - } - } - - /* PASS 3: redundant condition-code restore/save elimination. - Scan backwards from the end. future_dead_flags records the set - of flags which are dead at this point, that is, will be written - before they are next read. Earlier uinsns which write flags - already in future_dead_flags can have their writes annulled. - */ - future_dead_flags = FlagsEmpty; - - for (i = cb->used-1; i >= 0; i--) { - u = &cb->instrs[i]; - - /* We might never make it to insns beyond this one, so be - conservative. */ - if (u->opcode == JIFZ || u->opcode == JMP) { - future_dead_flags = FlagsEmpty; - continue; - } - - /* PUTF modifies the %EFLAGS in essentially unpredictable ways. - For example people try to mess with bit 21 to see if CPUID - works. The setting may or may not actually take hold. So we - play safe here. */ - if (u->opcode == PUTF) { - future_dead_flags = FlagsEmpty; - continue; - } - - /* We can annul the flags written by this insn if it writes a - subset (or eq) of the set of flags known to be dead after - this insn. If not, just record the flags also written by - this insn.*/ - if (u->flags_w != FlagsEmpty - && VG_IS_FLAG_SUBSET(u->flags_w, future_dead_flags)) { - if (VG_(disassemble)) { - VG_(printf)("at %d: annul flag write ", i); - vg_ppFlagSet("", u->flags_w); - VG_(printf)(" due to later "); - vg_ppFlagSet("", future_dead_flags); - VG_(printf)("\n"); - } - u->flags_w = FlagsEmpty; - } else { - future_dead_flags - = VG_UNION_FLAG_SETS ( u->flags_w, future_dead_flags ); - } - - /* If this insn also reads flags, empty out future_dead_flags so - as to force preceding writes not to be annulled. */ - if (u->flags_r != FlagsEmpty) - future_dead_flags = FlagsEmpty; - } - - if (last_live_before) - VG_(jitfree) ( last_live_before ); -} - - -/*------------------------------------------------------------*/ -/*--- The new register allocator. ---*/ -/*------------------------------------------------------------*/ - -typedef - struct { - /* Becomes live for the first time after this insn ... */ - Int live_after; - /* Becomes dead for the last time after this insn ... */ - Int dead_before; - /* The "home" spill slot, if needed. Never changes. */ - Int spill_no; - /* Where is it? VG_NOVALUE==in a spill slot; else in reg. */ - Int real_no; - } - TempInfo; - - -/* Take a ucode block and allocate its TempRegs to RealRegs, or put - them in spill locations, and add spill code, if there are not - enough real regs. The usual register allocation deal, in short. - - Important redundancy of representation: - - real_to_temp maps real reg ranks (RRRs) to TempReg nos, or - to VG_NOVALUE if the real reg has no currently assigned TempReg. - - The .real_no field of a TempInfo gives the current RRR for - this TempReg, or VG_NOVALUE if the TempReg is currently - in memory, in which case it is in the SpillNo denoted by - spillno. - - These pieces of information (a fwds-bwds mapping, really) must - be kept consistent! - - This allocator uses the so-called Second Chance Bin Packing - algorithm, as described in "Quality and Speed in Linear-scan - Register Allocation" (Traub, Holloway and Smith, ACM PLDI98, - pp142-151). It is simple and fast and remarkably good at - minimising the amount of spill code introduced. -*/ - -static -UCodeBlock* vg_do_register_allocation ( UCodeBlock* c1 ) -{ - TempInfo* temp_info; - Int real_to_temp[VG_MAX_REALREGS]; - Bool is_spill_cand[VG_MAX_REALREGS]; - Int ss_busy_until_before[VG_MAX_SPILLSLOTS]; - Int i, j, k, m, r, tno, max_ss_no; - Bool wr, defer, isRead, spill_reqd; - TempUse tempUse[3]; - UCodeBlock* c2; - - /* Used to denote ... well, "no value" in this fn. */ -# define VG_NOTHING (-2) - - /* Initialise the TempReg info. */ - if (c1->nextTemp > 0) - temp_info = VG_(jitmalloc)(c1->nextTemp * sizeof(TempInfo) ); - else - temp_info = NULL; - - for (i = 0; i < c1->nextTemp; i++) { - temp_info[i].live_after = VG_NOTHING; - temp_info[i].dead_before = VG_NOTHING; - temp_info[i].spill_no = VG_NOTHING; - /* temp_info[i].real_no is not yet relevant. */ - } - - spill_reqd = False; - - /* Scan fwds to establish live ranges. */ - - for (i = 0; i < c1->used; i++) { - k = getTempUsage(&c1->instrs[i], &tempUse[0]); - vg_assert(k >= 0 && k <= 3); - - /* For each temp usage ... fwds in program order */ - for (j = 0; j < k; j++) { - tno = tempUse[j].tempNo; - wr = tempUse[j].isWrite; - if (wr) { - /* Writes hold a reg live until after this insn. */ - if (temp_info[tno].live_after == VG_NOTHING) - temp_info[tno].live_after = i; - if (temp_info[tno].dead_before < i + 1) - temp_info[tno].dead_before = i + 1; - } else { - /* First use of a tmp should be a write. */ - vg_assert(temp_info[tno].live_after != VG_NOTHING); - /* Reads only hold it live until before this insn. */ - if (temp_info[tno].dead_before < i) - temp_info[tno].dead_before = i; - } - } - } - -# if 0 - /* Sanity check on live ranges. Expensive but correct. */ - for (i = 0; i < c1->nextTemp; i++) { - vg_assert( (temp_info[i].live_after == VG_NOTHING - && temp_info[i].dead_before == VG_NOTHING) - || (temp_info[i].live_after != VG_NOTHING - && temp_info[i].dead_before != VG_NOTHING) ); - } -# endif - - /* Do a rank-based allocation of TempRegs to spill slot numbers. - We put as few as possible values in spill slots, but - nevertheless need to have an assignment to them just in case. */ - - max_ss_no = -1; - - for (i = 0; i < VG_MAX_SPILLSLOTS; i++) - ss_busy_until_before[i] = 0; - - for (i = 0; i < c1->nextTemp; i++) { - - /* True iff this temp is unused. */ - if (temp_info[i].live_after == VG_NOTHING) - continue; - - /* Find the lowest-numbered spill slot which is available at the - start point of this interval, and assign the interval to - it. */ - for (j = 0; j < VG_MAX_SPILLSLOTS; j++) - if (ss_busy_until_before[j] <= temp_info[i].live_after) - break; - if (j == VG_MAX_SPILLSLOTS) { - VG_(printf)("VG_MAX_SPILLSLOTS is too low; increase and recompile.\n"); - VG_(panic)("register allocation failed -- out of spill slots"); - } - ss_busy_until_before[j] = temp_info[i].dead_before; - temp_info[i].spill_no = j; - if (j > max_ss_no) - max_ss_no = j; - } - - VG_(total_reg_rank) += (max_ss_no+1); - - /* Show live ranges and assigned spill slot nos. */ - - if (VG_(disassemble)) { - VG_(printf)("Live Range Assignments\n"); - - for (i = 0; i < c1->nextTemp; i++) { - if (temp_info[i].live_after == VG_NOTHING) - continue; - VG_(printf)( - " LR %d is after %d to before %d spillno %d\n", - i, - temp_info[i].live_after, - temp_info[i].dead_before, - temp_info[i].spill_no - ); - } - } - - /* Now that we've established a spill slot number for each used - temporary, we can go ahead and do the core of the "Second-chance - binpacking" allocation algorithm. */ - - /* Resulting code goes here. We generate it all in a forwards - pass. */ - c2 = VG_(allocCodeBlock)(); - - /* At the start, no TempRegs are assigned to any real register. - Correspondingly, all temps claim to be currently resident in - their spill slots, as computed by the previous two passes. */ - for (i = 0; i < VG_MAX_REALREGS; i++) - real_to_temp[i] = VG_NOTHING; - for (i = 0; i < c1->nextTemp; i++) - temp_info[i].real_no = VG_NOTHING; - - if (VG_(disassemble)) - VG_(printf)("\n"); - - /* Process each insn in turn. */ - for (i = 0; i < c1->used; i++) { - - if (c1->instrs[i].opcode == NOP) continue; - VG_(uinstrs_prealloc)++; - -# if 0 - /* Check map consistency. Expensive but correct. */ - for (r = 0; r < VG_MAX_REALREGS; r++) { - if (real_to_temp[r] != VG_NOTHING) { - tno = real_to_temp[r]; - vg_assert(tno >= 0 && tno < c1->nextTemp); - vg_assert(temp_info[tno].real_no == r); - } - } - for (tno = 0; tno < c1->nextTemp; tno++) { - if (temp_info[tno].real_no != VG_NOTHING) { - r = temp_info[tno].real_no; - vg_assert(r >= 0 && r < VG_MAX_REALREGS); - vg_assert(real_to_temp[r] == tno); - } - } -# endif - - if (VG_(disassemble)) - VG_(ppUInstr)(i, &c1->instrs[i]); - - /* First, free up enough real regs for this insn. This may - generate spill stores since we may have to evict some TempRegs - currently in real regs. Also generates spill loads. */ - - k = getTempUsage(&c1->instrs[i], &tempUse[0]); - vg_assert(k >= 0 && k <= 3); - - /* For each ***different*** temp mentioned in the insn .... */ - for (j = 0; j < k; j++) { - - /* First check if the temp is mentioned again later; if so, - ignore this mention. We only want to process each temp - used by the insn once, even if it is mentioned more than - once. */ - defer = False; - tno = tempUse[j].tempNo; - for (m = j+1; m < k; m++) - if (tempUse[m].tempNo == tno) - defer = True; - if (defer) - continue; - - /* Now we're trying to find a register for tempUse[j].tempNo. - First of all, if it already has a register assigned, we - don't need to do anything more. */ - if (temp_info[tno].real_no != VG_NOTHING) - continue; - - /* No luck. The next thing to do is see if there is a - currently unassigned register available. If so, bag it. */ - for (r = 0; r < VG_MAX_REALREGS; r++) { - if (real_to_temp[r] == VG_NOTHING) - break; - } - if (r < VG_MAX_REALREGS) { - real_to_temp[r] = tno; - temp_info[tno].real_no = r; - continue; - } - - /* Unfortunately, that didn't pan out either. So we'll have - to eject some other unfortunate TempReg into a spill slot - in order to free up a register. Of course, we need to be - careful not to eject some other TempReg needed by this - insn. - - Select r in 0 .. VG_MAX_REALREGS-1 such that - real_to_temp[r] is not mentioned in - tempUse[0 .. k-1].tempNo, since it would be just plain - wrong to eject some other TempReg which we need to use in - this insn. - - It is here that it is important to make a good choice of - register to spill. */ - - /* First, mark those regs which are not spill candidates. */ - for (r = 0; r < VG_MAX_REALREGS; r++) { - is_spill_cand[r] = True; - for (m = 0; m < k; m++) { - if (real_to_temp[r] == tempUse[m].tempNo) { - is_spill_cand[r] = False; - break; - } - } - } - - /* We can choose any r satisfying is_spill_cand[r]. However, - try to make a good choice. First, try and find r such - that the associated TempReg is already dead. */ - for (r = 0; r < VG_MAX_REALREGS; r++) { - if (is_spill_cand[r] && - temp_info[real_to_temp[r]].dead_before <= i) - goto have_spill_cand; - } - - /* No spill cand is mapped to a dead TempReg. Now we really - _do_ have to generate spill code. Choose r so that the - next use of its associated TempReg is as far ahead as - possible, in the hope that this will minimise the number of - consequent reloads required. This is a bit expensive, but - we don't have to do it very often. */ - { - Int furthest_r = VG_MAX_REALREGS; - Int furthest = 0; - for (r = 0; r < VG_MAX_REALREGS; r++) { - if (!is_spill_cand[r]) continue; - for (m = i+1; m < c1->used; m++) - if (uInstrMentionsTempReg(&c1->instrs[m], - real_to_temp[r])) - break; - if (m > furthest) { - furthest = m; - furthest_r = r; - } - } - r = furthest_r; - goto have_spill_cand; - } - - have_spill_cand: - if (r == VG_MAX_REALREGS) - VG_(panic)("new reg alloc: out of registers ?!"); - - /* Eject r. Important refinement: don't bother if the - associated TempReg is now dead. */ - vg_assert(real_to_temp[r] != VG_NOTHING); - vg_assert(real_to_temp[r] != tno); - temp_info[real_to_temp[r]].real_no = VG_NOTHING; - if (temp_info[real_to_temp[r]].dead_before > i) { - uInstr2(c2, PUT, 4, - RealReg, VG_(rankToRealRegNo)(r), - SpillNo, temp_info[real_to_temp[r]].spill_no); - VG_(uinstrs_spill)++; - spill_reqd = True; - if (VG_(disassemble)) - VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2)); - } - - /* Decide if tno is read. */ - isRead = False; - for (m = 0; m < k; m++) - if (tempUse[m].tempNo == tno && !tempUse[m].isWrite) - isRead = True; - - /* If so, generate a spill load. */ - if (isRead) { - uInstr2(c2, GET, 4, - SpillNo, temp_info[tno].spill_no, - RealReg, VG_(rankToRealRegNo)(r) ); - VG_(uinstrs_spill)++; - spill_reqd = True; - if (VG_(disassemble)) - VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2)); - } - - /* Update the forwards and backwards maps. */ - real_to_temp[r] = tno; - temp_info[tno].real_no = r; - } - - /* By this point, all TempRegs mentioned by the insn have been - bought into real regs. We now copy the insn to the output - and use patchUInstr to convert its rTempRegs into - realregs. */ - for (j = 0; j < k; j++) - tempUse[j].realNo - = VG_(rankToRealRegNo)(temp_info[tempUse[j].tempNo].real_no); - VG_(copyUInstr)(c2, &c1->instrs[i]); - patchUInstr(&LAST_UINSTR(c2), &tempUse[0], k); - - if (VG_(disassemble)) { - VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2)); - VG_(printf)("\n"); - } - } - - if (temp_info != NULL) - VG_(jitfree)(temp_info); - - VG_(freeCodeBlock)(c1); - - if (spill_reqd) - VG_(translations_needing_spill)++; - - return c2; - -# undef VG_NOTHING - -} - - -/*------------------------------------------------------------*/ -/*--- New instrumentation machinery. ---*/ -/*------------------------------------------------------------*/ - -static -VgTagOp get_VgT_ImproveOR_TQ ( Int sz ) -{ - switch (sz) { - case 4: return VgT_ImproveOR4_TQ; - case 2: return VgT_ImproveOR2_TQ; - case 1: return VgT_ImproveOR1_TQ; - default: VG_(panic)("get_VgT_ImproveOR_TQ"); - } -} - - -static -VgTagOp get_VgT_ImproveAND_TQ ( Int sz ) -{ - switch (sz) { - case 4: return VgT_ImproveAND4_TQ; - case 2: return VgT_ImproveAND2_TQ; - case 1: return VgT_ImproveAND1_TQ; - default: VG_(panic)("get_VgT_ImproveAND_TQ"); - } -} - - -static -VgTagOp get_VgT_Left ( Int sz ) -{ - switch (sz) { - case 4: return VgT_Left4; - case 2: return VgT_Left2; - case 1: return VgT_Left1; - default: VG_(panic)("get_VgT_Left"); - } -} - - -static -VgTagOp get_VgT_UifU ( Int sz ) -{ - switch (sz) { - case 4: return VgT_UifU4; - case 2: return VgT_UifU2; - case 1: return VgT_UifU1; - case 0: return VgT_UifU0; - default: VG_(panic)("get_VgT_UifU"); - } -} - - -static -VgTagOp get_VgT_DifD ( Int sz ) -{ - switch (sz) { - case 4: return VgT_DifD4; - case 2: return VgT_DifD2; - case 1: return VgT_DifD1; - default: VG_(panic)("get_VgT_DifD"); - } -} - - -static -VgTagOp get_VgT_PCast ( Int szs, Int szd ) -{ - if (szs == 4 && szd == 0) return VgT_PCast40; - if (szs == 2 && szd == 0) return VgT_PCast20; - if (szs == 1 && szd == 0) return VgT_PCast10; - if (szs == 0 && szd == 1) return VgT_PCast01; - if (szs == 0 && szd == 2) return VgT_PCast02; - if (szs == 0 && szd == 4) return VgT_PCast04; - if (szs == 1 && szd == 4) return VgT_PCast14; - if (szs == 1 && szd == 2) return VgT_PCast12; - if (szs == 1 && szd == 1) return VgT_PCast11; - VG_(printf)("get_VgT_PCast(%d,%d)\n", szs, szd); - VG_(panic)("get_VgT_PCast"); -} - - -static -VgTagOp get_VgT_Widen ( Bool syned, Int szs, Int szd ) -{ - if (szs == 1 && szd == 2 && syned) return VgT_SWiden12; - if (szs == 1 && szd == 2 && !syned) return VgT_ZWiden12; - - if (szs == 1 && szd == 4 && syned) return VgT_SWiden14; - if (szs == 1 && szd == 4 && !syned) return VgT_ZWiden14; - - if (szs == 2 && szd == 4 && syned) return VgT_SWiden24; - if (szs == 2 && szd == 4 && !syned) return VgT_ZWiden24; - - VG_(printf)("get_VgT_Widen(%d,%d,%d)\n", (Int)syned, szs, szd); - VG_(panic)("get_VgT_Widen"); -} - -/* Pessimally cast the spec'd shadow from one size to another. */ -static -void create_PCast ( UCodeBlock* cb, Int szs, Int szd, Int tempreg ) -{ - if (szs == 0 && szd == 0) - return; - uInstr3(cb, TAG1, 0, TempReg, tempreg, - NoValue, 0, - Lit16, get_VgT_PCast(szs,szd)); -} - - -/* Create a signed or unsigned widen of the spec'd shadow from one - size to another. The only allowed size transitions are 1->2, 1->4 - and 2->4. */ -static -void create_Widen ( UCodeBlock* cb, Bool signed_widen, - Int szs, Int szd, Int tempreg ) -{ - if (szs == szd) return; - uInstr3(cb, TAG1, 0, TempReg, tempreg, - NoValue, 0, - Lit16, get_VgT_Widen(signed_widen,szs,szd)); -} - - -/* Get the condition codes into a new shadow, at the given size. */ -static -Int create_GETVF ( UCodeBlock* cb, Int sz ) -{ - Int tt = newShadow(cb); - uInstr1(cb, GETVF, 0, TempReg, tt); - create_PCast(cb, 0, sz, tt); - return tt; -} - - -/* Save the condition codes from the spec'd shadow. */ -static -void create_PUTVF ( UCodeBlock* cb, Int sz, Int tempreg ) -{ - if (sz == 0) { - uInstr1(cb, PUTVF, 0, TempReg, tempreg); - } else { - Int tt = newShadow(cb); - uInstr2(cb, MOV, 4, TempReg, tempreg, TempReg, tt); - create_PCast(cb, sz, 0, tt); - uInstr1(cb, PUTVF, 0, TempReg, tt); - } -} - - -/* Do Left on the spec'd shadow. */ -static -void create_Left ( UCodeBlock* cb, Int sz, Int tempreg ) -{ - uInstr3(cb, TAG1, 0, - TempReg, tempreg, - NoValue, 0, - Lit16, get_VgT_Left(sz)); -} - - -/* Do UifU on ts and td, putting the result in td. */ -static -void create_UifU ( UCodeBlock* cb, Int sz, Int ts, Int td ) -{ - uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td, - Lit16, get_VgT_UifU(sz)); -} - - -/* Do DifD on ts and td, putting the result in td. */ -static -void create_DifD ( UCodeBlock* cb, Int sz, Int ts, Int td ) -{ - uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td, - Lit16, get_VgT_DifD(sz)); -} - - -/* Do HelpAND on value tval and tag tqqq, putting the result in - tqqq. */ -static -void create_ImproveAND_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq ) -{ - uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq, - Lit16, get_VgT_ImproveAND_TQ(sz)); -} - - -/* Do HelpOR on value tval and tag tqqq, putting the result in - tqqq. */ -static -void create_ImproveOR_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq ) -{ - uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq, - Lit16, get_VgT_ImproveOR_TQ(sz)); -} - - -/* Get the shadow for an operand described by (tag, val). Emit code - to do this and return the identity of the shadow holding the - result. The result tag is always copied into a new shadow, so it - can be modified without trashing the original.*/ -static -Int /* TempReg */ getOperandShadow ( UCodeBlock* cb, - Int sz, Int tag, Int val ) -{ - Int sh; - sh = newShadow(cb); - if (tag == TempReg) { - uInstr2(cb, MOV, 4, TempReg, SHADOW(val), TempReg, sh); - return sh; - } - if (tag == Literal) { - uInstr1(cb, SETV, sz, TempReg, sh); - return sh; - } - if (tag == ArchReg) { - uInstr2(cb, GETV, sz, ArchReg, val, TempReg, sh); - return sh; - } - VG_(panic)("getOperandShadow"); -} - - - -/* Create and return an instrumented version of cb_in. Free cb_in - before returning. */ -static UCodeBlock* vg_instrument ( UCodeBlock* cb_in ) -{ - UCodeBlock* cb; - Int i, j; - UInstr* u_in; - Int qs, qd, qt, qtt; - cb = VG_(allocCodeBlock)(); - cb->nextTemp = cb_in->nextTemp; - - for (i = 0; i < cb_in->used; i++) { - qs = qd = qt = qtt = INVALID_TEMPREG; - u_in = &cb_in->instrs[i]; - - /* if (i > 0) uInstr1(cb, NOP, 0, NoValue, 0); */ - - /* VG_(ppUInstr)(0, u_in); */ - switch (u_in->opcode) { - - case NOP: - break; - - case INCEIP: - VG_(copyUInstr)(cb, u_in); - break; - - /* Loads and stores. Test the V bits for the address. 24 - Mar 02: since the address is A-checked anyway, there's not - really much point in doing the V-check too, unless you - think that you might use addresses which are undefined but - still addressible. Hence the optionalisation of the V - check. - - The LOADV/STOREV does an addressibility check for the - address. */ - - case LOAD: - if (VG_(clo_check_addrVs)) { - uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1)); - uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1)); - } - uInstr2(cb, LOADV, u_in->size, - TempReg, u_in->val1, - TempReg, SHADOW(u_in->val2)); - VG_(copyUInstr)(cb, u_in); - break; - case STORE: - if (VG_(clo_check_addrVs)) { - uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2)); - uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val2)); - } - uInstr2(cb, STOREV, u_in->size, - TempReg, SHADOW(u_in->val1), - TempReg, u_in->val2); - VG_(copyUInstr)(cb, u_in); - break; - - /* Moving stuff around. Make the V bits follow accordingly, - but don't do anything else. */ - - case GET: - uInstr2(cb, GETV, u_in->size, - ArchReg, u_in->val1, - TempReg, SHADOW(u_in->val2)); - VG_(copyUInstr)(cb, u_in); - break; - case PUT: - uInstr2(cb, PUTV, u_in->size, - TempReg, SHADOW(u_in->val1), - ArchReg, u_in->val2); - VG_(copyUInstr)(cb, u_in); - break; - - case GETF: - /* This is not the smartest way to do it, but should work. */ - qd = create_GETVF(cb, u_in->size); - uInstr2(cb, MOV, 4, TempReg, qd, TempReg, SHADOW(u_in->val1)); - VG_(copyUInstr)(cb, u_in); - break; - case PUTF: - create_PUTVF(cb, u_in->size, SHADOW(u_in->val1)); - VG_(copyUInstr)(cb, u_in); - break; - - case MOV: - switch (u_in->tag1) { - case TempReg: - uInstr2(cb, MOV, 4, - TempReg, SHADOW(u_in->val1), - TempReg, SHADOW(u_in->val2)); - break; - case Literal: - uInstr1(cb, SETV, u_in->size, - TempReg, SHADOW(u_in->val2)); - break; - default: - VG_(panic)("vg_instrument: MOV"); - } - VG_(copyUInstr)(cb, u_in); - break; - - /* Special case of add, where one of the operands is a literal. - lea1(t) = t + some literal. - Therefore: lea1#(qa) = left(qa) - */ - case LEA1: - vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in)); - qs = SHADOW(u_in->val1); - qd = SHADOW(u_in->val2); - uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qd); - create_Left(cb, u_in->size, qd); - VG_(copyUInstr)(cb, u_in); - break; - - /* Another form of add. - lea2(ts,tt,shift) = ts + (tt << shift); shift is a literal - and is 0,1,2 or 3. - lea2#(qs,qt) = left(qs `UifU` (qt << shift)). - Note, subtly, that the shift puts zeroes at the bottom of qt, - meaning Valid, since the corresponding shift of tt puts - zeroes at the bottom of tb. - */ - case LEA2: { - Int shift; - vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in)); - switch (u_in->extra4b) { - case 1: shift = 0; break; - case 2: shift = 1; break; - case 4: shift = 2; break; - case 8: shift = 3; break; - default: VG_(panic)( "vg_instrument(LEA2)" ); - } - qs = SHADOW(u_in->val1); - qt = SHADOW(u_in->val2); - qd = SHADOW(u_in->val3); - uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qd); - if (shift > 0) { - uInstr2(cb, SHL, 4, Literal, 0, TempReg, qd); - uLiteral(cb, shift); - } - create_UifU(cb, 4, qs, qd); - create_Left(cb, u_in->size, qd); - VG_(copyUInstr)(cb, u_in); - break; - } - - /* inc#/dec#(qd) = q `UifU` left(qd) = left(qd) */ - case INC: case DEC: - qd = SHADOW(u_in->val1); - create_Left(cb, u_in->size, qd); - if (u_in->flags_w != FlagsEmpty) - create_PUTVF(cb, u_in->size, qd); - VG_(copyUInstr)(cb, u_in); - break; - - /* This is a HACK (approximation :-) */ - /* rcl#/rcr#(qs,qd) - = let q0 = pcast-sz-0(qd) `UifU` pcast-sz-0(qs) `UifU` eflags# - eflags# = q0 - qd =pcast-0-sz(q0) - Ie, cast everything down to a single bit, then back up. - This assumes that any bad bits infect the whole word and - the eflags. - */ - case RCL: case RCR: - vg_assert(u_in->flags_r != FlagsEmpty); - /* The following assertion looks like it makes sense, but is - actually wrong. Consider this: - rcll %eax - imull %eax, %eax - The rcll writes O and C but so does the imull, so the O and C - write of the rcll is annulled by the prior improvement pass. - Noticed by Kevin Ryde - */ - /* vg_assert(u_in->flags_w != FlagsEmpty); */ - qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1); - /* We can safely modify qs; cast it to 0-size. */ - create_PCast(cb, u_in->size, 0, qs); - qd = SHADOW(u_in->val2); - create_PCast(cb, u_in->size, 0, qd); - /* qs is cast-to-0(shift count#), and qd is cast-to-0(value#). */ - create_UifU(cb, 0, qs, qd); - /* qs is now free; reuse it for the flag definedness. */ - qs = create_GETVF(cb, 0); - create_UifU(cb, 0, qs, qd); - create_PUTVF(cb, 0, qd); - create_PCast(cb, 0, u_in->size, qd); - VG_(copyUInstr)(cb, u_in); - break; - - /* for OP in shl shr sar rol ror - (qs is shift count#, qd is value to be OP#d) - OP(ts,td) - OP#(qs,qd) - = pcast-1-sz(qs) `UifU` OP(ts,qd) - So we apply OP to the tag bits too, and then UifU with - the shift count# to take account of the possibility of it - being undefined. - - A bit subtle: - ROL/ROR rearrange the tag bits as per the value bits. - SHL/SHR shifts zeroes into the value, and corresponding - zeroes indicating Definedness into the tag. - SAR copies the top bit of the value downwards, and therefore - SAR also copies the definedness of the top bit too. - So in all five cases, we just apply the same op to the tag - bits as is applied to the value bits. Neat! - */ - case SHL: - case SHR: case SAR: - case ROL: case ROR: { - Int t_amount = INVALID_TEMPREG; - vg_assert(u_in->tag1 == TempReg || u_in->tag1 == Literal); - vg_assert(u_in->tag2 == TempReg); - qd = SHADOW(u_in->val2); - - /* Make qs hold shift-count# and make - t_amount be a TempReg holding the shift count. */ - if (u_in->tag1 == Literal) { - t_amount = newTemp(cb); - uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_amount); - uLiteral(cb, u_in->lit32); - qs = SHADOW(t_amount); - uInstr1(cb, SETV, 1, TempReg, qs); - } else { - t_amount = u_in->val1; - qs = SHADOW(u_in->val1); - } - - uInstr2(cb, u_in->opcode, - u_in->size, - TempReg, t_amount, - TempReg, qd); - qt = newShadow(cb); - uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt); - create_PCast(cb, 1, u_in->size, qt); - create_UifU(cb, u_in->size, qt, qd); - VG_(copyUInstr)(cb, u_in); - break; - } - - /* One simple tag operation. */ - case WIDEN: - vg_assert(u_in->tag1 == TempReg); - create_Widen(cb, u_in->signed_widen, u_in->extra4b, u_in->size, - SHADOW(u_in->val1)); - VG_(copyUInstr)(cb, u_in); - break; - - /* not#(x) = x (since bitwise independent) */ - case NOT: - vg_assert(u_in->tag1 == TempReg); - VG_(copyUInstr)(cb, u_in); - break; - - /* neg#(x) = left(x) (derivable from case for SUB) */ - case NEG: - vg_assert(u_in->tag1 == TempReg); - create_Left(cb, u_in->size, SHADOW(u_in->val1)); - VG_(copyUInstr)(cb, u_in); - break; - - /* bswap#(x) = bswap(x) */ - case BSWAP: - vg_assert(u_in->tag1 == TempReg); - vg_assert(u_in->size == 4); - qd = SHADOW(u_in->val1); - uInstr1(cb, BSWAP, 4, TempReg, qd); - VG_(copyUInstr)(cb, u_in); - break; - - /* cc2val#(qd) = pcast-0-to-size(eflags#) */ - case CC2VAL: - vg_assert(u_in->tag1 == TempReg); - vg_assert(u_in->flags_r != FlagsEmpty); - qt = create_GETVF(cb, u_in->size); - uInstr2(cb, MOV, 4, TempReg, qt, TempReg, SHADOW(u_in->val1)); - VG_(copyUInstr)(cb, u_in); - break; - - /* cmov#(qs,qd) = cmov(qs,qd) - That is, do the cmov of tags using the same flags as for - the data (obviously). However, first do a test on the - validity of the flags. - */ - case CMOV: - vg_assert(u_in->size == 4); - vg_assert(u_in->tag1 == TempReg); - vg_assert(u_in->tag2 == TempReg); - vg_assert(u_in->flags_r != FlagsEmpty); - vg_assert(u_in->flags_w == FlagsEmpty); - qs = SHADOW(u_in->val1); - qd = SHADOW(u_in->val2); - qt = create_GETVF(cb, 0); - uInstr1(cb, TESTV, 0, TempReg, qt); - /* qt should never be referred to again. Nevertheless - ... */ - uInstr1(cb, SETV, 0, TempReg, qt); - - uInstr2(cb, CMOV, 4, TempReg, qs, TempReg, qd); - LAST_UINSTR(cb).cond = u_in->cond; - LAST_UINSTR(cb).flags_r = u_in->flags_r; - - VG_(copyUInstr)(cb, u_in); - break; - - /* add#/sub#(qs,qd) - = qs `UifU` qd `UifU` left(qs) `UifU` left(qd) - = left(qs) `UifU` left(qd) - = left(qs `UifU` qd) - adc#/sbb#(qs,qd) - = left(qs `UifU` qd) `UifU` pcast(eflags#) - Second arg (dest) is TempReg. - First arg (src) is Literal or TempReg or ArchReg. - */ - case ADD: case SUB: - case ADC: case SBB: - qd = SHADOW(u_in->val2); - qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1); - create_UifU(cb, u_in->size, qs, qd); - create_Left(cb, u_in->size, qd); - if (u_in->opcode == ADC || u_in->opcode == SBB) { - vg_assert(u_in->flags_r != FlagsEmpty); - qt = create_GETVF(cb, u_in->size); - create_UifU(cb, u_in->size, qt, qd); - } - if (u_in->flags_w != FlagsEmpty) { - create_PUTVF(cb, u_in->size, qd); - } - VG_(copyUInstr)(cb, u_in); - break; - - /* xor#(qs,qd) = qs `UifU` qd */ - case XOR: - qd = SHADOW(u_in->val2); - qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1); - create_UifU(cb, u_in->size, qs, qd); - if (u_in->flags_w != FlagsEmpty) { - create_PUTVF(cb, u_in->size, qd); - } - VG_(copyUInstr)(cb, u_in); - break; - - /* and#/or#(qs,qd) - = (qs `UifU` qd) `DifD` improve(vs,qs) - `DifD` improve(vd,qd) - where improve is the relevant one of - Improve{AND,OR}_TQ - Use the following steps, with qt as a temp: - qt = improve(vd,qd) - qd = qs `UifU` qd - qd = qt `DifD` qd - qt = improve(vs,qs) - qd = qt `DifD` qd - */ - case AND: case OR: - vg_assert(u_in->tag1 == TempReg); - vg_assert(u_in->tag2 == TempReg); - qd = SHADOW(u_in->val2); - qs = SHADOW(u_in->val1); - qt = newShadow(cb); - - /* qt = improve(vd,qd) */ - uInstr2(cb, MOV, 4, TempReg, qd, TempReg, qt); - if (u_in->opcode == AND) - create_ImproveAND_TQ(cb, u_in->size, u_in->val2, qt); - else - create_ImproveOR_TQ(cb, u_in->size, u_in->val2, qt); - /* qd = qs `UifU` qd */ - create_UifU(cb, u_in->size, qs, qd); - /* qd = qt `DifD` qd */ - create_DifD(cb, u_in->size, qt, qd); - /* qt = improve(vs,qs) */ - uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt); - if (u_in->opcode == AND) - create_ImproveAND_TQ(cb, u_in->size, u_in->val1, qt); - else - create_ImproveOR_TQ(cb, u_in->size, u_in->val1, qt); - /* qd = qt `DifD` qd */ - create_DifD(cb, u_in->size, qt, qd); - /* So, finally qd is the result tag. */ - if (u_in->flags_w != FlagsEmpty) { - create_PUTVF(cb, u_in->size, qd); - } - VG_(copyUInstr)(cb, u_in); - break; - - /* Machinery to do with supporting CALLM. Copy the start and - end markers only to make the result easier to read - (debug); they generate no code and have no effect. - */ - case CALLM_S: case CALLM_E: - VG_(copyUInstr)(cb, u_in); - break; - - /* Copy PUSH and POP verbatim. Arg/result absval - calculations are done when the associated CALL is - processed. CLEAR has no effect on absval calculations but - needs to be copied. - */ - case PUSH: case POP: case CLEAR: - VG_(copyUInstr)(cb, u_in); - break; - - /* In short: - callm#(a1# ... an#) = (a1# `UifU` ... `UifU` an#) - We have to decide on a size to do the computation at, - although the choice doesn't affect correctness. We will - do a pcast to the final size anyway, so the only important - factor is to choose a size which minimises the total - number of casts needed. Valgrind: just use size 0, - regardless. It may not be very good for performance - but does simplify matters, mainly by reducing the number - of different pessimising casts which have to be implemented. - */ - case CALLM: { - UInstr* uu; - Bool res_used; - - /* Now generate the code. Get the final result absval - into qt. */ - qt = newShadow(cb); - qtt = newShadow(cb); - uInstr1(cb, SETV, 0, TempReg, qt); - for (j = i-1; cb_in->instrs[j].opcode != CALLM_S; j--) { - uu = & cb_in->instrs[j]; - if (uu->opcode != PUSH) continue; - /* cast via a temporary */ - uInstr2(cb, MOV, 4, TempReg, SHADOW(uu->val1), - TempReg, qtt); - create_PCast(cb, uu->size, 0, qtt); - create_UifU(cb, 0, qtt, qt); - } - /* Remembering also that flags read count as inputs. */ - if (u_in->flags_r != FlagsEmpty) { - qtt = create_GETVF(cb, 0); - create_UifU(cb, 0, qtt, qt); - } - - /* qt now holds the result tag. If any results from the - call are used, either by fetching with POP or - implicitly by writing the flags, we copy the result - absval to the relevant location. If not used, the call - must have been for its side effects, so we test qt here - and now. Note that this assumes that all values - removed by POP continue to be live. So dead args - *must* be removed with CLEAR, not by POPping them into - a dummy tempreg. - */ - res_used = False; - for (j = i+1; cb_in->instrs[j].opcode != CALLM_E; j++) { - uu = & cb_in->instrs[j]; - if (uu->opcode != POP) continue; - /* Cast via a temp. */ - uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qtt); - create_PCast(cb, 0, uu->size, qtt); - uInstr2(cb, MOV, 4, TempReg, qtt, - TempReg, SHADOW(uu->val1)); - res_used = True; - } - if (u_in->flags_w != FlagsEmpty) { - create_PUTVF(cb, 0, qt); - res_used = True; - } - if (!res_used) { - uInstr1(cb, TESTV, 0, TempReg, qt); - /* qt should never be referred to again. Nevertheless - ... */ - uInstr1(cb, SETV, 0, TempReg, qt); - } - VG_(copyUInstr)(cb, u_in); - break; - } - /* Whew ... */ - - case JMP: - if (u_in->tag1 == TempReg) { - uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1)); - uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1)); - } else { - vg_assert(u_in->tag1 == Literal); - } - if (u_in->cond != CondAlways) { - vg_assert(u_in->flags_r != FlagsEmpty); - qt = create_GETVF(cb, 0); - uInstr1(cb, TESTV, 0, TempReg, qt); - /* qt should never be referred to again. Nevertheless - ... */ - uInstr1(cb, SETV, 0, TempReg, qt); - } - VG_(copyUInstr)(cb, u_in); - break; - - case JIFZ: - uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1)); - uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1)); - VG_(copyUInstr)(cb, u_in); - break; - - /* Emit a check on the address used. For FPU_R, the value - loaded into the FPU is checked at the time it is read from - memory (see synth_fpu_mem_check_actions). */ - case FPU_R: case FPU_W: - vg_assert(u_in->tag2 == TempReg); - uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2)); - uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val2)); - VG_(copyUInstr)(cb, u_in); - break; - - /* For FPU insns not referencing memory, just copy thru. */ - case FPU: - VG_(copyUInstr)(cb, u_in); - break; - - default: - VG_(ppUInstr)(0, u_in); - VG_(panic)( "vg_instrument: unhandled case"); - - } /* end of switch (u_in->opcode) */ - - } /* end of for loop */ - - VG_(freeCodeBlock)(cb_in); - return cb; -} - -/*------------------------------------------------------------*/ -/*--- Clean up mem check instrumentation. ---*/ -/*------------------------------------------------------------*/ - -#define VGC_IS_SHADOW(tempreg) ((tempreg % 2) == 1) -#define VGC_UNDEF ((UChar)100) -#define VGC_VALUE ((UChar)101) - -#define NOP_no_msg(uu) \ - do { uu->opcode = NOP; } while (False) - -#define NOP_tag1_op(uu) \ - do { uu->opcode = NOP; \ - if (VG_(disassemble)) \ - VG_(printf)("at %d: delete %s due to defd arg\n", \ - i, VG_(nameOfTagOp(u->val3))); \ - } while (False) - -#define SETV_tag1_op(uu,newsz) \ - do { uu->opcode = SETV; \ - uu->size = newsz; \ - uu->tag2 = uu->tag3 = NoValue; \ - if (VG_(disassemble)) \ - VG_(printf)("at %d: convert %s to SETV%d " \ - "due to defd arg\n", \ - i, VG_(nameOfTagOp(u->val3)), newsz); \ - } while (False) - - - -/* Run backwards and delete SETVs on shadow temps for which the next - action is a write. Needs an env saying whether or not the next - action is a write. The supplied UCodeBlock is destructively - modified. -*/ -static void vg_delete_redundant_SETVs ( UCodeBlock* cb ) -{ - Bool* next_is_write; - Int i, j, k, n_temps; - UInstr* u; - TempUse tempUse[3]; - - n_temps = cb->nextTemp; - if (n_temps == 0) return; - - next_is_write = VG_(jitmalloc)(n_temps * sizeof(Bool)); - - for (i = 0; i < n_temps; i++) next_is_write[i] = True; - - for (i = cb->used-1; i >= 0; i--) { - u = &cb->instrs[i]; - - /* If we're not checking address V bits, there will be a lot of - GETVs, TAG1s and TAG2s calculating values which are never - used. These first three cases get rid of them. */ - - if (u->opcode == GETV && VGC_IS_SHADOW(u->val2) - && next_is_write[u->val2] - && !VG_(clo_check_addrVs)) { - u->opcode = NOP; - u->size = 0; - if (VG_(disassemble)) - VG_(printf)("at %d: delete GETV\n", i); - } else - - if (u->opcode == TAG1 && VGC_IS_SHADOW(u->val1) - && next_is_write[u->val1] - && !VG_(clo_check_addrVs)) { - u->opcode = NOP; - u->size = 0; - if (VG_(disassemble)) - VG_(printf)("at %d: delete TAG1\n", i); - } else - - if (u->opcode == TAG2 && VGC_IS_SHADOW(u->val2) - && next_is_write[u->val2] - && !VG_(clo_check_addrVs)) { - u->opcode = NOP; - u->size = 0; - if (VG_(disassemble)) - VG_(printf)("at %d: delete TAG2\n", i); - } else - - /* We do the rest of these regardless of whether or not - addresses are V-checked. */ - - if (u->opcode == MOV && VGC_IS_SHADOW(u->val2) - && next_is_write[u->val2]) { - /* This MOV is pointless because the target is dead at this - point. Delete it. */ - u->opcode = NOP; - u->size = 0; - if (VG_(disassemble)) - VG_(printf)("at %d: delete MOV\n", i); - } else - - if (u->opcode == SETV) { - if (u->tag1 == TempReg) { - vg_assert(VGC_IS_SHADOW(u->val1)); - if (next_is_write[u->val1]) { - /* This write is pointless, so annul it. */ - u->opcode = NOP; - u->size = 0; - if (VG_(disassemble)) - VG_(printf)("at %d: delete SETV\n", i); - } else { - /* This write has a purpose; don't annul it, but do - notice that we did it. */ - next_is_write[u->val1] = True; - } - - } - - } else { - /* Find out what this insn does to the temps. */ - k = getTempUsage(u, &tempUse[0]); - vg_assert(k <= 3); - for (j = k-1; j >= 0; j--) { - next_is_write[ tempUse[j].tempNo ] - = tempUse[j].isWrite; - } - } - - } - - VG_(jitfree)(next_is_write); -} - - -/* Run forwards, propagating and using the is-completely-defined - property. This removes a lot of redundant tag-munging code. - Unfortunately it requires intimate knowledge of how each uinstr and - tagop modifies its arguments. This duplicates knowledge of uinstr - tempreg uses embodied in getTempUsage(), which is unfortunate. - The supplied UCodeBlock* is modified in-place. - - For each value temp, def[] should hold VGC_VALUE. - - For each shadow temp, def[] may hold 4,2,1 or 0 iff that shadow is - definitely known to be fully defined at that size. In all other - circumstances a shadow's def[] entry is VGC_UNDEF, meaning possibly - undefined. In cases of doubt, VGC_UNDEF is always safe. -*/ -static void vg_propagate_definedness ( UCodeBlock* cb ) -{ - UChar* def; - Int i, j, k, t, n_temps; - UInstr* u; - TempUse tempUse[3]; - - n_temps = cb->nextTemp; - if (n_temps == 0) return; - - def = VG_(jitmalloc)(n_temps * sizeof(UChar)); - for (i = 0; i < n_temps; i++) - def[i] = VGC_IS_SHADOW(i) ? VGC_UNDEF : VGC_VALUE; - - /* Run forwards, detecting and using the all-defined property. */ - - for (i = 0; i < cb->used; i++) { - u = &cb->instrs[i]; - switch (u->opcode) { - - /* Tag-handling uinstrs. */ - - /* Deal with these quickly. */ - case NOP: - case INCEIP: - break; - - /* Make a tag defined. */ - case SETV: - vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1)); - def[u->val1] = u->size; - break; - - /* Check definedness of a tag. */ - case TESTV: - vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1)); - if (def[u->val1] <= 4) { - vg_assert(def[u->val1] == u->size); - NOP_no_msg(u); - if (VG_(disassemble)) - VG_(printf)("at %d: delete TESTV on defd arg\n", i); - } - break; - - /* Applies to both values and tags. Propagate Definedness - property through copies. Note that this isn't optional; - we *have* to do this to keep def[] correct. */ - case MOV: - vg_assert(u->tag2 == TempReg); - if (u->tag1 == TempReg) { - if (VGC_IS_SHADOW(u->val1)) { - vg_assert(VGC_IS_SHADOW(u->val2)); - def[u->val2] = def[u->val1]; - } - } - break; - - case PUTV: - vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1)); - if (def[u->val1] <= 4) { - vg_assert(def[u->val1] == u->size); - u->tag1 = Literal; - u->val1 = 0; - switch (u->size) { - case 4: u->lit32 = 0x00000000; break; - case 2: u->lit32 = 0xFFFF0000; break; - case 1: u->lit32 = 0xFFFFFF00; break; - default: VG_(panic)("vg_cleanup(PUTV)"); - } - if (VG_(disassemble)) - VG_(printf)( - "at %d: propagate definedness into PUTV\n", i); - } - break; - - case STOREV: - vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1)); - if (def[u->val1] <= 4) { - vg_assert(def[u->val1] == u->size); - u->tag1 = Literal; - u->val1 = 0; - switch (u->size) { - case 4: u->lit32 = 0x00000000; break; - case 2: u->lit32 = 0xFFFF0000; break; - case 1: u->lit32 = 0xFFFFFF00; break; - default: VG_(panic)("vg_cleanup(STOREV)"); - } - if (VG_(disassemble)) - VG_(printf)( - "at %d: propagate definedness into STandV\n", i); - } - break; - - /* Nothing interesting we can do with this, I think. */ - case PUTVF: - break; - - /* Tag handling operations. */ - case TAG2: - vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2)); - vg_assert(u->tag3 == Lit16); - /* Ultra-paranoid "type" checking. */ - switch (u->val3) { - case VgT_ImproveAND4_TQ: case VgT_ImproveAND2_TQ: - case VgT_ImproveAND1_TQ: case VgT_ImproveOR4_TQ: - case VgT_ImproveOR2_TQ: case VgT_ImproveOR1_TQ: - vg_assert(u->tag1 == TempReg && !VGC_IS_SHADOW(u->val1)); - break; - default: - vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1)); - break; - } - switch (u->val3) { - Int sz; - case VgT_UifU4: - sz = 4; goto do_UifU; - case VgT_UifU2: - sz = 2; goto do_UifU; - case VgT_UifU1: - sz = 1; goto do_UifU; - case VgT_UifU0: - sz = 0; goto do_UifU; - do_UifU: - vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1)); - vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2)); - if (def[u->val1] <= 4) { - /* UifU. The first arg is defined, so result is - simply second arg. Delete this operation. */ - vg_assert(def[u->val1] == sz); - NOP_no_msg(u); - if (VG_(disassemble)) - VG_(printf)( - "at %d: delete UifU%d due to defd arg1\n", - i, sz); - } - else - if (def[u->val2] <= 4) { - /* UifU. The second arg is defined, so result is - simply first arg. Copy to second. */ - vg_assert(def[u->val2] == sz); - u->opcode = MOV; - u->size = 4; - u->tag3 = NoValue; - def[u->val2] = def[u->val1]; - if (VG_(disassemble)) - VG_(printf)( - "at %d: change UifU%d to MOV due to defd" - " arg2\n", - i, sz); - } - break; - case VgT_ImproveAND4_TQ: - sz = 4; goto do_ImproveAND; - case VgT_ImproveAND1_TQ: - sz = 1; goto do_ImproveAND; - do_ImproveAND: - /* Implements Q = T OR Q. So if Q is entirely defined, - ie all 0s, we get MOV T, Q. */ - if (def[u->val2] <= 4) { - vg_assert(def[u->val2] == sz); - u->size = 4; /* Regardless of sz */ - u->opcode = MOV; - u->tag3 = NoValue; - def[u->val2] = VGC_UNDEF; - if (VG_(disassemble)) - VG_(printf)( - "at %d: change ImproveAND%d_TQ to MOV due " - "to defd arg2\n", - i, sz); - } - break; - default: - goto unhandled; - } - break; - - case TAG1: - vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1)); - if (def[u->val1] > 4) break; - /* We now know that the arg to the op is entirely defined. - If the op changes the size of the arg, we must replace - it with a SETV at the new size. If it doesn't change - the size, we can delete it completely. */ - switch (u->val3) { - /* Maintain the same size ... */ - case VgT_Left4: - vg_assert(def[u->val1] == 4); - NOP_tag1_op(u); - break; - case VgT_PCast11: - vg_assert(def[u->val1] == 1); - NOP_tag1_op(u); - break; - /* Change size ... */ - case VgT_PCast40: - vg_assert(def[u->val1] == 4); - SETV_tag1_op(u,0); - def[u->val1] = 0; - break; - case VgT_PCast14: - vg_assert(def[u->val1] == 1); - SETV_tag1_op(u,4); - def[u->val1] = 4; - break; - case VgT_PCast12: - vg_assert(def[u->val1] == 1); - SETV_tag1_op(u,2); - def[u->val1] = 2; - break; - case VgT_PCast10: - vg_assert(def[u->val1] == 1); - SETV_tag1_op(u,0); - def[u->val1] = 0; - break; - case VgT_PCast02: - vg_assert(def[u->val1] == 0); - SETV_tag1_op(u,2); - def[u->val1] = 2; - break; - default: - goto unhandled; - } - if (VG_(disassemble)) - VG_(printf)( - "at %d: delete TAG1 %s due to defd arg\n", - i, VG_(nameOfTagOp(u->val3))); - break; - - default: - unhandled: - /* We don't know how to handle this uinstr. Be safe, and - set to VGC_VALUE or VGC_UNDEF all temps written by it. */ - k = getTempUsage(u, &tempUse[0]); - vg_assert(k <= 3); - for (j = 0; j < k; j++) { - t = tempUse[j].tempNo; - vg_assert(t >= 0 && t < n_temps); - if (!tempUse[j].isWrite) { - /* t is read; ignore it. */ - if (0&& VGC_IS_SHADOW(t) && def[t] <= 4) - VG_(printf)("ignoring def %d at %s %s\n", - def[t], - VG_(nameUOpcode)(True, u->opcode), - (u->opcode == TAG1 || u->opcode == TAG2) - ? VG_(nameOfTagOp)(u->val3) - : (Char*)""); - } else { - /* t is written; better nullify it. */ - def[t] = VGC_IS_SHADOW(t) ? VGC_UNDEF : VGC_VALUE; - } - } - } - } - - VG_(jitfree)(def); -} - - -/* Top level post-instrumentation cleanup function. */ -static void vg_cleanup ( UCodeBlock* cb ) -{ - vg_propagate_definedness ( cb ); - vg_delete_redundant_SETVs ( cb ); -} - - -/*------------------------------------------------------------*/ -/*--- Main entry point for the JITter. ---*/ -/*------------------------------------------------------------*/ - -/* Translate the basic block beginning at orig_addr, placing the - translation in a vg_malloc'd block, the address and size of which - are returned in trans_addr and trans_size. Length of the original - block is also returned in orig_size. If the latter three are NULL, - this call is being done for debugging purposes, in which case (a) - throw away the translation once it is made, and (b) produce a load - of debugging output. -*/ -void VG_(translate) ( ThreadState* tst, - /* Identity of thread needing this block */ - Addr orig_addr, - UInt* orig_size, - Addr* trans_addr, - UInt* trans_size ) -{ - Int n_disassembled_bytes, final_code_size; - Bool debugging_translation; - UChar* final_code; - UCodeBlock* cb; - - VGP_PUSHCC(VgpTranslate); - debugging_translation - = orig_size == NULL || trans_addr == NULL || trans_size == NULL; - - dis = True; - dis = debugging_translation; - - /* Check if we're being asked to jump to a silly address, and if so - record an error message before potentially crashing the entire - system. */ - if (VG_(clo_instrument) && !debugging_translation && !dis) { - Addr bad_addr; - Bool ok = VGM_(check_readable) ( orig_addr, 1, &bad_addr ); - if (!ok) { - VG_(record_jump_error)(tst, bad_addr); - } - } - - /* if (VG_(overall_in_count) >= 4800) dis=True; */ - if (VG_(disassemble)) - VG_(printf)("\n"); - if (0 || dis - || (VG_(overall_in_count) > 0 && - (VG_(overall_in_count) % 1000 == 0))) { - if (0&& (VG_(clo_verbosity) > 1 || dis)) - VG_(message)(Vg_UserMsg, - "trans# %d, bb# %lu, in %d, out %d", - VG_(overall_in_count), - VG_(bbs_done), - VG_(overall_in_osize), VG_(overall_in_tsize), - orig_addr ); - } - cb = VG_(allocCodeBlock)(); - - /* Disassemble this basic block into cb. */ - /* VGP_PUSHCC(VgpToUCode); */ - n_disassembled_bytes = VG_(disBB) ( cb, orig_addr ); - /* VGP_POPCC; */ - /* dis=True; */ - /* if (0&& VG_(translations_done) < 617) */ - /* dis=False; */ - /* Try and improve the code a bit. */ - if (VG_(clo_optimise)) { - /* VGP_PUSHCC(VgpImprove); */ - vg_improve ( cb ); - if (VG_(disassemble)) - VG_(ppUCodeBlock) ( cb, "Improved code:" ); - /* VGP_POPCC; */ - } - /* dis=False; */ - /* Add instrumentation code. */ - if (VG_(clo_instrument)) { - /* VGP_PUSHCC(VgpInstrument); */ - cb = vg_instrument(cb); - /* VGP_POPCC; */ - if (VG_(disassemble)) - VG_(ppUCodeBlock) ( cb, "Instrumented code:" ); - if (VG_(clo_cleanup)) { - /* VGP_PUSHCC(VgpCleanup); */ - vg_cleanup(cb); - /* VGP_POPCC; */ - if (VG_(disassemble)) - VG_(ppUCodeBlock) ( cb, "Cleaned-up instrumented code:" ); - } - } - - //VG_(disassemble) = True; - - /* Add cache simulation code. */ - if (VG_(clo_cachesim)) { - /* VGP_PUSHCC(VgpCacheInstrument); */ - cb = VG_(cachesim_instrument)(cb, orig_addr); - /* VGP_POPCC; */ - if (VG_(disassemble)) - VG_(ppUCodeBlock) ( cb, "Cachesim instrumented code:" ); - } - - //VG_(disassemble) = False; - - /* Allocate registers. */ - /* VGP_PUSHCC(VgpRegAlloc); */ - cb = vg_do_register_allocation ( cb ); - /* VGP_POPCC; */ - /* dis=False; */ - /* - if (VG_(disassemble)) - VG_(ppUCodeBlock) ( cb, "After Register Allocation:"); - */ - - /* VGP_PUSHCC(VgpFromUcode); */ - /* NB final_code is allocated with VG_(jitmalloc), not VG_(malloc) - and so must be VG_(jitfree)'d. */ - final_code = VG_(emit_code)(cb, &final_code_size ); - /* VGP_POPCC; */ - VG_(freeCodeBlock)(cb); - - if (debugging_translation) { - /* Only done for debugging -- throw away final result. */ - VG_(jitfree)(final_code); - } else { - /* Doing it for real -- return values to caller. */ - *orig_size = n_disassembled_bytes; - *trans_addr = (Addr)final_code; - *trans_size = final_code_size; - } - VGP_POPCC; -} - -/*--------------------------------------------------------------------*/ -/*--- end vg_translate.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_transtab.c b/coregrind/vg_transtab.c deleted file mode 100644 index a6e15b3053..0000000000 --- a/coregrind/vg_transtab.c +++ /dev/null @@ -1,566 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- Management of the translation table and cache. ---*/ -/*--- vg_transtab.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_include.h" -#include "vg_constants.h" - -/* #define DEBUG_TRANSTAB */ - - -/*------------------------------------------------------------*/ -/*--- Management of the LRU-based translation table+cache. ---*/ -/*------------------------------------------------------------*/ - -/* These sizes were set up so as to be able to debug large KDE 3 - applications (are there any small ones?) without excessive amounts - of code retranslation. */ - -/* Size of the translation cache, in bytes. */ -#define VG_TC_SIZE /*1000000*/ /*16000000*/ 32000000 /*40000000*/ - -/* Do a LRU pass when the translation cache becomes this full. */ -#define VG_TC_LIMIT_PERCENT 98 - -/* When doing an LRU pass, reduce TC fullness to this level. */ -#define VG_TC_TARGET_PERCENT 85 - -/* Number of entries in the translation table. This must be a prime - number in order to make the hashing work properly. */ -#define VG_TT_SIZE /*5281*/ /*100129*/ 200191 /*250829*/ - -/* Do an LRU pass when the translation table becomes this full. */ -#define VG_TT_LIMIT_PERCENT /*67*/ 80 - -/* When doing an LRU pass, reduce TT fullness to this level. */ -#define VG_TT_TARGET_PERCENT /*60*/ 70 - -/* The number of age steps we track. 0 means the current epoch, - N_EPOCHS-1 means used the epoch N_EPOCHS-1 or more ago. */ -#define VG_N_EPOCHS /*2000*/ /*4000*/ 20000 - -/* This TT entry is empty. There is no associated TC storage. */ -#define VG_TTE_EMPTY ((Addr)1) -/* This TT entry has been deleted, in the sense that it does not - contribute to the orig->trans mapping. However, the ex-translation - it points at still occupies space in TC. This slot cannot be - re-used without doing an LRU pass. */ -#define VG_TTE_DELETED ((Addr)3) - -/* The TC. This used to be statically allocated, but that forces many - SecMap arrays to be pointlessly allocated at startup, bloating the - process size by about 22M and making startup slow. So now we - dynamically allocate it at startup time. - was: static UChar vg_tc[VG_TC_SIZE]; -*/ -static UChar* vg_tc = NULL; - -/* Count of bytes used in the TC. This includes those pointed to from - VG_TTE_DELETED entries. */ -static Int vg_tc_used = 0; - -/* The TT. Like TC, for the same reason, is dynamically allocated at - startup. - was: static TTEntry vg_tt[VG_TT_SIZE]; -*/ -static TTEntry* vg_tt = NULL; - -/* Count of non-empty TT entries. This includes deleted ones. */ -static Int vg_tt_used = 0; - -/* Fast helper for the TT. A direct-mapped cache which holds a - pointer to a TT entry which may or may not be the correct one, but - which we hope usually is. This array is referred to directly from - vg_dispatch.S. */ -Addr VG_(tt_fast)[VG_TT_FAST_SIZE]; - -/* For reading/writing the misaligned TT-index word at immediately - preceding every translation in TC. */ -#if 0 - /* Big sigh. However reasonable this seems, there are those who - set AC in %EFLAGS (Alignment Check) to 1, causing bus errors. A - proper solution is for valgrind to properly virtualise AC, like - the other flags (DOSZACP). The current cheap hack simply avoids - all misaligned accesses, so valgrind doesn't fault even if AC is - set. */ -# define VG_READ_MISALIGNED_WORD(aaa) (*((UInt*)(aaa))) -# define VG_WRITE_MISALIGNED_WORD(aaa,vvv) *((UInt*)(aaa)) = ((UInt)(vvv)) -#else - static __inline__ - UInt VG_READ_MISALIGNED_WORD ( Addr aaa ) - { - UInt w = 0; - UChar* p = (UChar*)aaa; - w = 0xFF & ((UInt)(p[3])); - w = (w << 8) | (0xFF & ((UInt)(p[2]))); - w = (w << 8) | (0xFF & ((UInt)(p[1]))); - w = (w << 8) | (0xFF & ((UInt)(p[0]))); - return w; - } - - static __inline__ - void VG_WRITE_MISALIGNED_WORD ( Addr aaa, UInt vvv ) - { - UChar* p = (UChar*)aaa; - p[0] = vvv & 0xFF; - p[1] = (vvv >> 8) & 0xFF; - p[2] = (vvv >> 16) & 0xFF; - p[3] = (vvv >> 24) & 0xFF; - } -#endif - - -/* Used for figuring out an age threshold for translations. */ -static Int vg_bytes_in_epoch[VG_N_EPOCHS]; -static Int vg_entries_in_epoch[VG_N_EPOCHS]; - - -/* Just so these counts can be queried without making them globally - visible. */ -void VG_(get_tt_tc_used) ( UInt* tt_used, UInt* tc_used ) -{ - *tt_used = vg_tt_used; - *tc_used = vg_tc_used; -} - - -/* Do the LRU thing on TT/TC, clearing them back to the target limits - if they are over the threshold limits. -*/ -void VG_(maybe_do_lru_pass) ( void ) -{ - Int i, j, r, w, thresh, ttno; - TTEntry* tte; - - const Int tc_limit = (Int)(((double)VG_TC_SIZE * (double)VG_TC_LIMIT_PERCENT) - / (double)100.0); - const Int tt_limit = (Int)(((double)VG_TT_SIZE * (double)VG_TT_LIMIT_PERCENT) - / (double)100.0); - const Int tc_target = (Int)(((double)VG_TC_SIZE * (double)VG_TC_TARGET_PERCENT) - / (double)100.0); - const Int tt_target = (Int)(((double)VG_TT_SIZE * (double)VG_TT_TARGET_PERCENT) - / (double)100.0); - - /* Decide quickly if we need to do an LRU pass ? */ - if (vg_tc_used <= tc_limit && vg_tt_used <= tt_limit) - return; - -# ifdef DEBUG_TRANSTAB - VG_(sanity_check_tc_tt)(); -# endif - - VGP_PUSHCC(VgpDoLRU); - /* - VG_(printf)( - "limits: tc_limit %d, tt_limit %d, tc_target %d, tt_target %d\n", - tc_limit, tt_limit, tc_target, tt_target); - */ - - if (VG_(clo_verbosity) > 2) - VG_(printf)(" pre-LRU: tc %d (target %d), tt %d (target %d)\n", - vg_tc_used, tc_target, vg_tt_used, tt_target); - - /* Yes we do. Figure out what threshold age is required in order to - shrink both the TC and TT occupancy below TC_TARGET_PERCENT and - TT_TARGET_PERCENT respectively. */ - - VG_(number_of_lrus)++; - - /* Count the number of TC bytes and TT entries in each epoch. */ - for (i = 0; i < VG_N_EPOCHS; i++) - vg_bytes_in_epoch[i] = vg_entries_in_epoch[i] = 0; - - for (i = 0; i < VG_TT_SIZE; i++) { - if (vg_tt[i].orig_addr == VG_TTE_EMPTY - || vg_tt[i].orig_addr == VG_TTE_DELETED) - continue; - j = vg_tt[i].mru_epoch; - vg_assert(j <= VG_(current_epoch)); - j = VG_(current_epoch) - j; - if (j >= VG_N_EPOCHS) j = VG_N_EPOCHS-1; - vg_assert(0 <= j && j < VG_N_EPOCHS); - /* Greater j now means older. */ - vg_entries_in_epoch[j]++; - vg_bytes_in_epoch[j] += 4+vg_tt[i].trans_size; - } - - /* - for (i = 0; i < VG_N_EPOCHS; i++) - VG_(printf)("epoch %d: ents %d, bytes %d\n", - i, vg_entries_in_epoch[i], vg_bytes_in_epoch[i]); - */ - - /* Cumulatise. Make vg_{bytes,entries}_in_epoch[n] contain the - counts for itself and all younger epochs. */ - for (i = 1; i < VG_N_EPOCHS; i++) { - vg_entries_in_epoch[i] += vg_entries_in_epoch[i-1]; - vg_bytes_in_epoch[i] += vg_bytes_in_epoch[i-1]; - } - - for (thresh = 0; thresh < VG_N_EPOCHS; thresh++) { - if (vg_entries_in_epoch[thresh] > tt_target - || vg_bytes_in_epoch[thresh] >= tc_target) - break; - } - - if (VG_(clo_verbosity) > 2) - VG_(printf)( - " LRU: discard translations %d or more epochs since last use\n", - thresh - ); - - thresh = VG_(current_epoch) - thresh; - - /* Ok, so we will hit our targets if we retain all entries most - recently used at most thresh epochs ago. Traverse the TT and - mark such entries as deleted. */ - for (i = 0; i < VG_TT_SIZE; i++) { - if (vg_tt[i].orig_addr == VG_TTE_EMPTY - || vg_tt[i].orig_addr == VG_TTE_DELETED) - continue; - if (vg_tt[i].mru_epoch <= thresh) { - vg_tt[i].orig_addr = VG_TTE_DELETED; - VG_(this_epoch_out_count) ++; - VG_(this_epoch_out_osize) += vg_tt[i].orig_size; - VG_(this_epoch_out_tsize) += vg_tt[i].trans_size; - VG_(overall_out_count) ++; - VG_(overall_out_osize) += vg_tt[i].orig_size; - VG_(overall_out_tsize) += vg_tt[i].trans_size; - } - } - - /* Now compact the TC, sliding live entries downwards to fill spaces - left by deleted entries. In this loop, r is the offset in TC of - the current translation under consideration, and w is the next - allocation point. */ - r = w = 0; - while (True) { - if (r >= vg_tc_used) break; - /* The first four bytes of every translation contain the index - of its TT entry. The TT entry's .trans_addr field points at - the start of the code proper, not at this 4-byte index, so - that we don't constantly have to keep adding 4 in the main - lookup/dispatch loop. */ - - ttno = VG_READ_MISALIGNED_WORD((Addr)(&vg_tc[r])); - vg_assert(ttno >= 0 && ttno < VG_TT_SIZE); - tte = & vg_tt[ ttno ]; - vg_assert(tte->orig_addr != VG_TTE_EMPTY); - if (tte->orig_addr != VG_TTE_DELETED) { - /* We want to keep this one alive. */ - /* Sanity check the pointer back to TC. */ - vg_assert(tte->trans_addr == (Addr)&vg_tc[r+4]); - for (i = 0; i < 4+tte->trans_size; i++) - vg_tc[w+i] = vg_tc[r+i]; - tte->trans_addr = (Addr)&vg_tc[w+4]; - w += 4+tte->trans_size; - } else { - tte->orig_addr = VG_TTE_EMPTY; - vg_tt_used--; - } - r += 4+tte->trans_size; - } - /* should have traversed an exact number of translations, with no - slop at the end. */ - vg_assert(w <= r); - vg_assert(r == vg_tc_used); - vg_assert(w <= r); - vg_assert(w <= tc_target); - vg_tc_used = w; - - vg_assert(vg_tt_used >= 0); - vg_assert(vg_tt_used <= tt_target); - - /* Invalidate the fast cache, since it is now out of date. It will get - reconstructed incrementally when the client resumes. */ - VG_(invalidate_tt_fast)(); - - if (VG_(clo_verbosity) > 2) - VG_(printf)("post-LRU: tc %d (target %d), tt %d (target %d)\n", - vg_tc_used, tc_target, vg_tt_used, tt_target); - - if (VG_(clo_verbosity) > 1) - VG_(message)(Vg_UserMsg, - "epoch %d (bb %luk): thresh %d, " - "out %d (%dk -> %dk), new TT %d, TC %dk", - VG_(current_epoch), - VG_(bbs_done) / 1000, - VG_(current_epoch) - thresh, - VG_(this_epoch_out_count), - VG_(this_epoch_out_osize) / 1000, - VG_(this_epoch_out_tsize) / 1000, - vg_tt_used, vg_tc_used / 1000 - ); - - /* Reconstruct the SMC detection structures. */ -# ifdef DEBUG_TRANSTAB - for (i = 0; i < VG_TT_SIZE; i++) - vg_assert(vg_tt[i].orig_addr != VG_TTE_DELETED); -# endif - VG_(sanity_check_tc_tt)(); - - VGP_POPCC; -} - - -/* Do a sanity check on TT/TC. -*/ -void VG_(sanity_check_tc_tt) ( void ) -{ - Int i, counted_entries, counted_bytes; - TTEntry* tte; - counted_entries = 0; - counted_bytes = 0; - for (i = 0; i < VG_TT_SIZE; i++) { - tte = &vg_tt[i]; - if (tte->orig_addr == VG_TTE_EMPTY) continue; - vg_assert(tte->mru_epoch >= 0); - vg_assert(tte->mru_epoch <= VG_(current_epoch)); - counted_entries++; - counted_bytes += 4+tte->trans_size; - vg_assert(tte->trans_addr >= (Addr)&vg_tc[4]); - vg_assert(tte->trans_addr < (Addr)&vg_tc[vg_tc_used]); - vg_assert(VG_READ_MISALIGNED_WORD(tte->trans_addr-4) == i); - } - vg_assert(counted_entries == vg_tt_used); - vg_assert(counted_bytes == vg_tc_used); -} - - -/* Add this already-filled-in entry to the TT. Assumes that the - relevant code chunk has been placed in TC, along with a dummy back - pointer, which is inserted here. -*/ -extern void VG_(add_to_trans_tab) ( TTEntry* tte ) -{ - Int i; - /* - VG_(printf)("add_to_trans_tab(%d) %x %d %x %d\n", - vg_tt_used, tte->orig_addr, tte->orig_size, - tte->trans_addr, tte->trans_size); - */ - vg_assert(tte->orig_addr != VG_TTE_DELETED - && tte->orig_addr != VG_TTE_EMPTY); - /* Hash to get initial probe point. */ - i = ((UInt)(tte->orig_addr)) % VG_TT_SIZE; - while (True) { - if (vg_tt[i].orig_addr == tte->orig_addr) - VG_(panic)("add_to_trans_tab: duplicate"); - if (vg_tt[i].orig_addr == VG_TTE_EMPTY) { - /* Put it here, and set the back pointer. */ - vg_tt[i] = *tte; - VG_WRITE_MISALIGNED_WORD(tte->trans_addr-4, i); - vg_tt_used++; - return; - } - i++; - if (i == VG_TT_SIZE) i = 0; - } -} - - -/* Copy a new translation's code into TC, leaving a 4-byte hole for - the back pointer, and returning a pointer to the code proper (not - the hole) in TC. -*/ -Addr VG_(copy_to_transcache) ( Addr trans_addr, Int trans_size ) -{ - Int i; - Addr ret_addr; - if (4+trans_size > VG_TC_SIZE-vg_tc_used) - VG_(panic)("copy_to_transcache: not enough free space?!"); - /* Leave a hole for the back pointer to the TT entry. */ - vg_tc_used += 4; - ret_addr = (Addr)&vg_tc[vg_tc_used]; - for (i = 0; i < trans_size; i++) - vg_tc[vg_tc_used+i] = ((UChar*)trans_addr)[i]; - vg_tc_used += trans_size; - return ret_addr; -} - - -/* Invalidate the tt_fast cache, for whatever reason. Tricky. We - have to find a TTE_EMPTY slot to point all entries at. */ -void VG_(invalidate_tt_fast)( void ) -{ - Int i, j; - for (i = 0; i < VG_TT_SIZE && vg_tt[i].orig_addr != VG_TTE_EMPTY; i++) - ; - vg_assert(i < VG_TT_SIZE - && vg_tt[i].orig_addr == VG_TTE_EMPTY); - for (j = 0; j < VG_TT_FAST_SIZE; j++) - VG_(tt_fast)[j] = (Addr)&vg_tt[i]; -} - - -/* Search TT to find the translated address of the supplied original, - or NULL if not found. This routine is used when we miss in - VG_(tt_fast). -*/ -static __inline__ TTEntry* search_trans_table ( Addr orig_addr ) -{ - //static Int queries = 0; - //static Int probes = 0; - Int i; - /* Hash to get initial probe point. */ - // if (queries == 10000) { - // VG_(printf)("%d queries, %d probes\n", queries, probes); - // queries = probes = 0; - //} - //queries++; - i = ((UInt)orig_addr) % VG_TT_SIZE; - while (True) { - //probes++; - if (vg_tt[i].orig_addr == orig_addr) - return &vg_tt[i]; - if (vg_tt[i].orig_addr == VG_TTE_EMPTY) - return NULL; - i++; - if (i == VG_TT_SIZE) i = 0; - } -} - - -/* Find the translation address for a given (original) code address. - If found, update VG_(tt_fast) so subsequent lookups are fast. If - no translation can be found, return zero. This routine is (the - only one) called from vg_run_innerloop. */ -Addr VG_(search_transtab) ( Addr original_addr ) -{ - TTEntry* tte; - VGP_PUSHCC(VgpSlowFindT); - tte = search_trans_table ( original_addr ); - if (tte == NULL) { - /* We didn't find it. vg_run_innerloop will have to request a - translation. */ - VGP_POPCC; - return (Addr)0; - } else { - /* Found it. Put the search result into the fast cache now. - Also set the mru_epoch to mark this translation as used. */ - UInt cno = (UInt)original_addr & VG_TT_FAST_MASK; - VG_(tt_fast)[cno] = (Addr)tte; - VG_(tt_fast_misses)++; - tte->mru_epoch = VG_(current_epoch); - VGP_POPCC; - return tte->trans_addr; - } -} - - -/* Invalidate translations of original code [start .. start + range - 1]. - This is slow, so you *really* don't want to call it very often. -*/ -void VG_(invalidate_translations) ( Addr start, UInt range ) -{ - Addr i_start, i_end, o_start, o_end; - UInt out_count, out_osize, out_tsize; - Int i; - -# ifdef DEBUG_TRANSTAB - VG_(sanity_check_tc_tt)(); -# endif - i_start = start; - i_end = start + range - 1; - out_count = out_osize = out_tsize = 0; - - for (i = 0; i < VG_TT_SIZE; i++) { - if (vg_tt[i].orig_addr == VG_TTE_EMPTY - || vg_tt[i].orig_addr == VG_TTE_DELETED) continue; - o_start = vg_tt[i].orig_addr; - o_end = o_start + vg_tt[i].orig_size - 1; - if (o_end < i_start || o_start > i_end) - continue; - if (VG_(clo_cachesim)) - VG_(cachesim_notify_discard)( & vg_tt[i] ); - vg_tt[i].orig_addr = VG_TTE_DELETED; - VG_(this_epoch_out_count) ++; - VG_(this_epoch_out_osize) += vg_tt[i].orig_size; - VG_(this_epoch_out_tsize) += vg_tt[i].trans_size; - VG_(overall_out_count) ++; - VG_(overall_out_osize) += vg_tt[i].orig_size; - VG_(overall_out_tsize) += vg_tt[i].trans_size; - out_count ++; - out_osize += vg_tt[i].orig_size; - out_tsize += vg_tt[i].trans_size; - } - - if (out_count > 0) { - VG_(invalidate_tt_fast)(); - VG_(sanity_check_tc_tt)(); -# ifdef DEBUG_TRANSTAB - { Addr aa; - for (aa = i_start; aa <= i_end; aa++) - vg_assert(search_trans_table ( aa ) == NULL); - } -# endif - } - - if (1|| VG_(clo_verbosity) > 1) - VG_(message)(Vg_UserMsg, - "discard %d (%d -> %d) translations in range %p .. %p", - out_count, out_osize, out_tsize, i_start, i_end ); -} - - -/*------------------------------------------------------------*/ -/*--- Initialisation. ---*/ -/*------------------------------------------------------------*/ - -void VG_(init_tt_tc) ( void ) -{ - Int i; - - /* Allocate the translation table and translation cache. */ - vg_assert(vg_tc == NULL); - vg_tc = VG_(get_memory_from_mmap) ( VG_TC_SIZE * sizeof(UChar), - "trans-cache" ); - vg_assert(vg_tc != NULL); - - vg_assert(vg_tt == NULL); - vg_tt = VG_(get_memory_from_mmap) ( VG_TT_SIZE * sizeof(TTEntry), - "trans-table" ); - vg_assert(vg_tt != NULL); - - /* The main translation table is empty. */ - vg_tt_used = 0; - for (i = 0; i < VG_TT_SIZE; i++) { - vg_tt[i].orig_addr = VG_TTE_EMPTY; - } - - /* The translation table's fast cache is empty. Point all entries - at the first TT entry, which is, of course, empty. */ - for (i = 0; i < VG_TT_FAST_SIZE; i++) - VG_(tt_fast)[i] = (Addr)(&vg_tt[0]); -} - -/*--------------------------------------------------------------------*/ -/*--- end vg_transtab.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_unsafe.h b/coregrind/vg_unsafe.h deleted file mode 100644 index 0f726468d0..0000000000 --- a/coregrind/vg_unsafe.h +++ /dev/null @@ -1,91 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- A header file for making sense of syscalls. Unsafe in the ---*/ -/*--- sense that we don't call any functions mentioned herein. ---*/ -/*--- vg_unsafe.h ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - - -/* These includes are only used for making sense of the args for - system calls. */ -#include /* for system call numbers */ -#include /* for PROT_* */ -#include /* for uname */ -#include /* for struct timeval & struct timezone */ -#include /* for the SYS_* constants */ -#include /* for struct rlimit */ -#include /* for struct shmid_ds & struct ipc_perm */ -#include /* for struct msghdr */ -#include /* for sockaddr_un */ -#include /* for struct ifreq et al */ -#include /* for struct arpreq */ -#include /* for struct rtentry */ -#include /* for struct ipc_kludge */ -#include /* for struct msgbuf */ -#include /* for struct sembuf */ - -#include /* for ISDN ioctls */ -#include /* for the SG_* ioctls */ -#include /* for struct sched_param */ -#include /* for struct __sysctl_args */ -#include /* for cd-rom ioctls */ - -#define __USE_LARGEFILE64 -#include /* for struct stat */ -#undef __USE_LARGEFILE64 - -#include /* for stuff for dealing with ioctl :( */ -#include /* for various soundcard ioctl constants :( */ - -#ifndef GLIBC_2_1 -# include /* for RTC_* ioctls */ -#endif - -#include -#include - -/* 2.2 stuff ... */ -#include - -/* Both */ -#include -#include /* for struct tms */ - -/* 2.0 at least, for gid_t and loff_t */ -#include - -#include - -#include - -#include - - -/*--------------------------------------------------------------------*/ -/*--- end vg_unsafe.h ---*/ -/*--------------------------------------------------------------------*/ diff --git a/coregrind/vg_valgrinq_dummy.c b/coregrind/vg_valgrinq_dummy.c deleted file mode 100644 index a0b14410ea..0000000000 --- a/coregrind/vg_valgrinq_dummy.c +++ /dev/null @@ -1,43 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- Used to make a dummy valgrinq.so, which does nothing at all. ---*/ -/*--- vg_valgrinq_dummy.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -/* For the rationale behind this file, look at - VG_(mash_LD_PRELOAD_string) in vg_main.c. */ - -/* Remember not to use a variable of this name in any program you want - to debug :-) */ -int dont_mess_with_the_RSCDS = 0; - -/* If you are bored, perhaps have a look at http://www.rscds.org. */ - -/*--------------------------------------------------------------------*/ -/*--- end vg_valgrinq_dummy.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/helgrind/Makefile.am b/helgrind/Makefile.am deleted file mode 100644 index 60553ddac6..0000000000 --- a/helgrind/Makefile.am +++ /dev/null @@ -1,110 +0,0 @@ -SUBDIRS = demangle . docs tests - -CFLAGS = $(WERROR) -DVG_LIBDIR="\"$(libdir)"\" \ - -Winline -Wall -Wshadow -O -fomit-frame-pointer -g - -valdir = $(libdir)/valgrind - -LDFLAGS = -Wl,-z -Wl,initfirst - -INCLUDES = -I$(srcdir)/demangle - -bin_SCRIPTS = valgrind cachegrind vg_annotate - -SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp - -val_DATA = $(SUPP_FILES) default.supp - -BUILT_SOURCES = default.supp - -default.supp: $(SUPP_FILES) - -bzdist: dist - gunzip -c $(PACKAGE)-$(VERSION).tar.gz | bzip2 > $(PACKAGE)-$(VERSION).tar.bz2 - -EXTRA_DIST = $(val_DATA) \ - PATCHES_APPLIED ACKNOWLEDGEMENTS \ - README_KDE3_FOLKS README_PACKAGERS \ - README_MISSING_SYSCALL_OR_IOCTL TODO dosyms vg_libpthread.vs \ - valgrind.spec valgrind.spec.in - -val_PROGRAMS = valgrind.so valgrinq.so libpthread.so - -libpthread_so_SOURCES = vg_libpthread.c vg_libpthread_unimp.c - -valgrinq_so_SOURCES = vg_valgrinq_dummy.c - -valgrind_so_SOURCES = \ - vg_clientfuncs.c \ - vg_scheduler.c \ - vg_cachesim.c \ - vg_clientmalloc.c \ - vg_clientperms.c \ - vg_demangle.c \ - vg_dispatch.S \ - vg_errcontext.c \ - vg_execontext.c \ - vg_from_ucode.c \ - vg_helpers.S \ - vg_main.c \ - vg_malloc2.c \ - vg_memory.c \ - vg_messages.c \ - vg_mylibc.c \ - vg_procselfmaps.c \ - vg_profile.c \ - vg_signals.c \ - vg_startup.S \ - vg_symtab2.c \ - vg_syscall_mem.c \ - vg_syscall.S \ - vg_to_ucode.c \ - vg_translate.c \ - vg_transtab.c \ - vg_vtagops.c - -valgrind_so_LDADD = \ - demangle/cp-demangle.o \ - demangle/cplus-dem.o \ - demangle/dyn-string.o \ - demangle/safe-ctype.o - -include_HEADERS = valgrind.h - -noinst_HEADERS = \ - vg_cachesim_gen.c \ - vg_cachesim_I1.c \ - vg_cachesim_D1.c \ - vg_cachesim_L2.c \ - vg_kerneliface.h \ - vg_include.h \ - vg_constants.h \ - vg_unsafe.h - -MANUAL_DEPS = $(noinst_HEADERS) $(include_HEADERS) - -vg_memory.o: vg_memory.c $(MANUAL_DEPS) - $(COMPILE) -O2 @PREFERRED_STACK_BOUNDARY@ -c $< - -vg_clientfuncs.o: vg_clientfuncs.c $(MANUAL_DEPS) - $(COMPILE) -fno-omit-frame-pointer -c $< - -vg_libpthread.o: vg_libpthread.c $(MANUAL_DEPS) - $(COMPILE) -fno-omit-frame-pointer -c $< - -valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS) - $(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \ - $(valgrind_so_OBJECTS) $(valgrind_so_LDADD) - -valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS) - $(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS) - -libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs - $(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \ - $(libpthread_so_OBJECTS) \ - -Wl,-version-script $(srcdir)/vg_libpthread.vs - -install-exec-hook: - $(mkinstalldirs) $(DESTDIR)$(valdir) - rm -f $(DESTDIR)$(valdir)/libpthread.so.0 - $(LN_S) libpthread.so $(DESTDIR)$(valdir)/libpthread.so.0 diff --git a/include/valgrind.h b/include/valgrind.h deleted file mode 100644 index 5a819c78ae..0000000000 --- a/include/valgrind.h +++ /dev/null @@ -1,243 +0,0 @@ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - - -#ifndef __VALGRIND_H -#define __VALGRIND_H - - -/* This file is for inclusion into client (your!) code. - - You can use these macros to manipulate and query memory permissions - inside your own programs. - - The resulting executables will still run without Valgrind, just a - little bit more slowly than they otherwise would, but otherwise - unchanged. - - When run on Valgrind with --client-perms=yes, Valgrind observes - these macro calls and takes appropriate action. When run on - Valgrind with --client-perms=no (the default), Valgrind observes - these macro calls but does not take any action as a result. */ - - - -/* This defines the magic code sequence which the JITter spots and - handles magically. Don't look too closely at this; it will rot - your brain. Valgrind dumps the result value in %EDX, so we first - copy the default value there, so that it is returned when not - running on Valgrind. Since %EAX points to a block of mem - containing the args, you can pass as many args as you want like - this. Currently this is set up to deal with 4 args since that's - the max that we appear to need (pthread_create). -*/ -#define VALGRIND_MAGIC_SEQUENCE( \ - _zzq_rlval, /* result lvalue */ \ - _zzq_default, /* result returned when running on real CPU */ \ - _zzq_request, /* request code */ \ - _zzq_arg1, /* request first param */ \ - _zzq_arg2, /* request second param */ \ - _zzq_arg3, /* request third param */ \ - _zzq_arg4 /* request fourth param */ ) \ - \ - { volatile unsigned int _zzq_args[5]; \ - _zzq_args[0] = (volatile unsigned int)(_zzq_request); \ - _zzq_args[1] = (volatile unsigned int)(_zzq_arg1); \ - _zzq_args[2] = (volatile unsigned int)(_zzq_arg2); \ - _zzq_args[3] = (volatile unsigned int)(_zzq_arg3); \ - _zzq_args[4] = (volatile unsigned int)(_zzq_arg4); \ - asm volatile("movl %1, %%eax\n\t" \ - "movl %2, %%edx\n\t" \ - "roll $29, %%eax ; roll $3, %%eax\n\t" \ - "rorl $27, %%eax ; rorl $5, %%eax\n\t" \ - "roll $13, %%eax ; roll $19, %%eax\n\t" \ - "movl %%edx, %0\t" \ - : "=r" (_zzq_rlval) \ - : "r" (&_zzq_args[0]), "r" (_zzq_default) \ - : "eax", "edx", "cc", "memory" \ - ); \ - } - - -/* Some request codes. There are many more of these, but most are not - exposed to end-user view. These are the public ones, all of the - form 0x1000 + small_number. -*/ - -#define VG_USERREQ__MAKE_NOACCESS 0x1001 -#define VG_USERREQ__MAKE_WRITABLE 0x1002 -#define VG_USERREQ__MAKE_READABLE 0x1003 -#define VG_USERREQ__DISCARD 0x1004 -#define VG_USERREQ__CHECK_WRITABLE 0x1005 -#define VG_USERREQ__CHECK_READABLE 0x1006 -#define VG_USERREQ__MAKE_NOACCESS_STACK 0x1007 -#define VG_USERREQ__RUNNING_ON_VALGRIND 0x1008 -#define VG_USERREQ__DO_LEAK_CHECK 0x1009 /* untested */ -#define VG_USERREQ__DISCARD_TRANSLATIONS 0x100A - - -/* Client-code macros to manipulate the state of memory. */ - -/* Mark memory at _qzz_addr as unaddressible and undefined for - _qzz_len bytes. Returns an int handle pertaining to the block - descriptions Valgrind will use in subsequent error messages. */ -#define VALGRIND_MAKE_NOACCESS(_qzz_addr,_qzz_len) \ - ({unsigned int _qzz_res; \ - VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0 /* default return */, \ - VG_USERREQ__MAKE_NOACCESS, \ - _qzz_addr, _qzz_len, 0, 0); \ - _qzz_res; \ - }) - -/* Similarly, mark memory at _qzz_addr as addressible but undefined - for _qzz_len bytes. */ -#define VALGRIND_MAKE_WRITABLE(_qzz_addr,_qzz_len) \ - ({unsigned int _qzz_res; \ - VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0 /* default return */, \ - VG_USERREQ__MAKE_WRITABLE, \ - _qzz_addr, _qzz_len, 0, 0); \ - _qzz_res; \ - }) - -/* Similarly, mark memory at _qzz_addr as addressible and defined - for _qzz_len bytes. */ -#define VALGRIND_MAKE_READABLE(_qzz_addr,_qzz_len) \ - ({unsigned int _qzz_res; \ - VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0 /* default return */, \ - VG_USERREQ__MAKE_READABLE, \ - _qzz_addr, _qzz_len, 0, 0); \ - _qzz_res; \ - }) - -/* Discard a block-description-handle obtained from the above three - macros. After this, Valgrind will no longer be able to relate - addressing errors to the user-defined block associated with the - handle. The permissions settings associated with the handle remain - in place. Returns 1 for an invalid handle, 0 for a valid - handle. */ -#define VALGRIND_DISCARD(_qzz_blkindex) \ - ({unsigned int _qzz_res; \ - VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0 /* default return */, \ - VG_USERREQ__DISCARD, \ - 0, _qzz_blkindex, 0, 0); \ - _qzz_res; \ - }) - - - -/* Client-code macros to check the state of memory. */ - -/* Check that memory at _qzz_addr is addressible for _qzz_len bytes. - If suitable addressibility is not established, Valgrind prints an - error message and returns the address of the first offending byte. - Otherwise it returns zero. */ -#define VALGRIND_CHECK_WRITABLE(_qzz_addr,_qzz_len) \ - ({unsigned int _qzz_res; \ - VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0, \ - VG_USERREQ__CHECK_WRITABLE, \ - _qzz_addr, _qzz_len, 0, 0); \ - _qzz_res; \ - }) - -/* Check that memory at _qzz_addr is addressible and defined for - _qzz_len bytes. If suitable addressibility and definedness are not - established, Valgrind prints an error message and returns the - address of the first offending byte. Otherwise it returns zero. */ -#define VALGRIND_CHECK_READABLE(_qzz_addr,_qzz_len) \ - ({unsigned int _qzz_res; \ - VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0, \ - VG_USERREQ__CHECK_READABLE, \ - _qzz_addr, _qzz_len, 0, 0); \ - _qzz_res; \ - }) - - -/* Use this macro to force the definedness and addressibility of a - value to be checked. If suitable addressibility and definedness - are not established, Valgrind prints an error message and returns - the address of the first offending byte. Otherwise it returns - zero. */ -#define VALGRIND_CHECK_DEFINED(__lvalue) \ - (void) \ - VALGRIND_CHECK_READABLE( \ - (volatile unsigned char *)&(__lvalue), \ - (unsigned int)(sizeof (__lvalue))) - - - -/* Mark memory, intended to be on the client's stack, at _qzz_addr as - unaddressible and undefined for _qzz_len bytes. Does not return a - value. The record associated with this setting will be - automatically removed by Valgrind when the containing routine - exits. */ -#define VALGRIND_MAKE_NOACCESS_STACK(_qzz_addr,_qzz_len) \ - {unsigned int _qzz_res; \ - VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0, \ - VG_USERREQ__MAKE_NOACCESS_STACK, \ - _qzz_addr, _qzz_len, 0, 0); \ - } - - -/* Returns 1 if running on Valgrind, 0 if running on the real CPU. - Currently implemented but untested. */ -#define RUNNING_ON_VALGRIND \ - ({unsigned int _qzz_res; \ - VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0 /* returned if not */, \ - VG_USERREQ__RUNNING_ON_VALGRIND, \ - 0, 0, 0, 0); \ - _qzz_res; \ - }) - - -/* Mark memory, intended to be on the client's stack, at _qzz_addr as - unaddressible and undefined for _qzz_len bytes. Does not return a - value. The record associated with this setting will be - automatically removed by Valgrind when the containing routine - exits. - - Currently implemented but untested. -*/ -#define VALGRIND_DO_LEAK_CHECK \ - {unsigned int _qzz_res; \ - VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0, \ - VG_USERREQ__DO_LEAK_CHECK, \ - 0, 0, 0, 0); \ - } - - -/* Discard translation of code in the range [_qzz_addr .. _qzz_addr + - _qzz_len - 1]. Useful if you are debugging a JITter or some such, - since it provides a way to make sure valgrind will retranslate the - invalidated area. Returns no value. */ -#define VALGRIND_DISCARD_TRANSLATIONS(_qzz_addr,_qzz_len) \ - {unsigned int _qzz_res; \ - VALGRIND_MAGIC_SEQUENCE(_qzz_res, 0, \ - VG_USERREQ__DISCARD_TRANSLATIONS, \ - _qzz_addr, _qzz_len, 0, 0); \ - } - - -#endif diff --git a/include/vg_profile.c b/include/vg_profile.c deleted file mode 100644 index 34e98d6e70..0000000000 --- a/include/vg_profile.c +++ /dev/null @@ -1,111 +0,0 @@ - -/*--------------------------------------------------------------------*/ -/*--- Profiling machinery -- not for release builds! ---*/ -/*--- vg_profile.c ---*/ -/*--------------------------------------------------------------------*/ - -/* - This file is part of Valgrind, an x86 protected-mode emulator - designed for debugging and profiling binaries on x86-Unixes. - - Copyright (C) 2000-2002 Julian Seward - jseward@acm.org - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of the - License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, but - WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307, USA. - - The GNU General Public License is contained in the file LICENSE. -*/ - -#include "vg_include.h" - -#ifdef VG_PROFILE - -/* get rid of these, if possible */ -#include -#include - -#define VGP_PAIR(enumname,str) str -static const Char* vgp_names[VGP_M_CCS] = { VGP_LIST }; -#undef VGP_PAIR - -static Int vgp_nticks; -static Int vgp_counts[VGP_M_CCS]; -static Int vgp_entries[VGP_M_CCS]; - -static Int vgp_sp; -static VgpCC vgp_stack[VGP_M_STACK]; - -void VGP_(tick) ( int sigNo ) -{ - Int cc; - vgp_nticks++; - cc = vgp_stack[vgp_sp]; - vg_assert(cc >= 0 && cc < VGP_M_CCS); - vgp_counts[ cc ]++; -} - -void VGP_(init_profiling) ( void ) -{ - struct itimerval value; - Int i, ret; - - for (i = 0; i < VGP_M_CCS; i++) - vgp_counts[i] = vgp_entries[i] = 0; - - vgp_nticks = 0; - vgp_sp = -1; - VGP_(pushcc) ( VgpUnc ); - - value.it_interval.tv_sec = 0; - value.it_interval.tv_usec = 10 * 1000; - value.it_value = value.it_interval; - - signal(SIGPROF, VGP_(tick) ); - ret = setitimer(ITIMER_PROF, &value, NULL); - if (ret != 0) VG_(panic)("vgp_init_profiling"); -} - -void VGP_(done_profiling) ( void ) -{ - Int i; - VG_(printf)("Profiling done, %d ticks\n", vgp_nticks); - for (i = 0; i < VGP_M_CCS; i++) - VG_(printf)("%2d: %4d (%3d %%%%) ticks, %8d entries for %s\n", - i, vgp_counts[i], - (Int)(1000.0 * (double)vgp_counts[i] / (double)vgp_nticks), - vgp_entries[i], - vgp_names[i] ); -} - -void VGP_(pushcc) ( VgpCC cc ) -{ - if (vgp_sp >= VGP_M_STACK-1) VG_(panic)("vgp_pushcc"); - vgp_sp++; - vgp_stack[vgp_sp] = cc; - vgp_entries[ cc ] ++; -} - -void VGP_(popcc) ( void ) -{ - if (vgp_sp <= 0) VG_(panic)("vgp_popcc"); - vgp_sp--; -} - -#endif /* VG_PROFILE */ - -/*--------------------------------------------------------------------*/ -/*--- end vg_profile.c ---*/ -/*--------------------------------------------------------------------*/ diff --git a/lackey/Makefile.am b/lackey/Makefile.am deleted file mode 100644 index 60553ddac6..0000000000 --- a/lackey/Makefile.am +++ /dev/null @@ -1,110 +0,0 @@ -SUBDIRS = demangle . docs tests - -CFLAGS = $(WERROR) -DVG_LIBDIR="\"$(libdir)"\" \ - -Winline -Wall -Wshadow -O -fomit-frame-pointer -g - -valdir = $(libdir)/valgrind - -LDFLAGS = -Wl,-z -Wl,initfirst - -INCLUDES = -I$(srcdir)/demangle - -bin_SCRIPTS = valgrind cachegrind vg_annotate - -SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp - -val_DATA = $(SUPP_FILES) default.supp - -BUILT_SOURCES = default.supp - -default.supp: $(SUPP_FILES) - -bzdist: dist - gunzip -c $(PACKAGE)-$(VERSION).tar.gz | bzip2 > $(PACKAGE)-$(VERSION).tar.bz2 - -EXTRA_DIST = $(val_DATA) \ - PATCHES_APPLIED ACKNOWLEDGEMENTS \ - README_KDE3_FOLKS README_PACKAGERS \ - README_MISSING_SYSCALL_OR_IOCTL TODO dosyms vg_libpthread.vs \ - valgrind.spec valgrind.spec.in - -val_PROGRAMS = valgrind.so valgrinq.so libpthread.so - -libpthread_so_SOURCES = vg_libpthread.c vg_libpthread_unimp.c - -valgrinq_so_SOURCES = vg_valgrinq_dummy.c - -valgrind_so_SOURCES = \ - vg_clientfuncs.c \ - vg_scheduler.c \ - vg_cachesim.c \ - vg_clientmalloc.c \ - vg_clientperms.c \ - vg_demangle.c \ - vg_dispatch.S \ - vg_errcontext.c \ - vg_execontext.c \ - vg_from_ucode.c \ - vg_helpers.S \ - vg_main.c \ - vg_malloc2.c \ - vg_memory.c \ - vg_messages.c \ - vg_mylibc.c \ - vg_procselfmaps.c \ - vg_profile.c \ - vg_signals.c \ - vg_startup.S \ - vg_symtab2.c \ - vg_syscall_mem.c \ - vg_syscall.S \ - vg_to_ucode.c \ - vg_translate.c \ - vg_transtab.c \ - vg_vtagops.c - -valgrind_so_LDADD = \ - demangle/cp-demangle.o \ - demangle/cplus-dem.o \ - demangle/dyn-string.o \ - demangle/safe-ctype.o - -include_HEADERS = valgrind.h - -noinst_HEADERS = \ - vg_cachesim_gen.c \ - vg_cachesim_I1.c \ - vg_cachesim_D1.c \ - vg_cachesim_L2.c \ - vg_kerneliface.h \ - vg_include.h \ - vg_constants.h \ - vg_unsafe.h - -MANUAL_DEPS = $(noinst_HEADERS) $(include_HEADERS) - -vg_memory.o: vg_memory.c $(MANUAL_DEPS) - $(COMPILE) -O2 @PREFERRED_STACK_BOUNDARY@ -c $< - -vg_clientfuncs.o: vg_clientfuncs.c $(MANUAL_DEPS) - $(COMPILE) -fno-omit-frame-pointer -c $< - -vg_libpthread.o: vg_libpthread.c $(MANUAL_DEPS) - $(COMPILE) -fno-omit-frame-pointer -c $< - -valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS) - $(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \ - $(valgrind_so_OBJECTS) $(valgrind_so_LDADD) - -valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS) - $(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS) - -libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs - $(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \ - $(libpthread_so_OBJECTS) \ - -Wl,-version-script $(srcdir)/vg_libpthread.vs - -install-exec-hook: - $(mkinstalldirs) $(DESTDIR)$(valdir) - rm -f $(DESTDIR)$(valdir)/libpthread.so.0 - $(LN_S) libpthread.so $(DESTDIR)$(valdir)/libpthread.so.0 diff --git a/memcheck/Makefile.am b/memcheck/Makefile.am deleted file mode 100644 index 60553ddac6..0000000000 --- a/memcheck/Makefile.am +++ /dev/null @@ -1,110 +0,0 @@ -SUBDIRS = demangle . docs tests - -CFLAGS = $(WERROR) -DVG_LIBDIR="\"$(libdir)"\" \ - -Winline -Wall -Wshadow -O -fomit-frame-pointer -g - -valdir = $(libdir)/valgrind - -LDFLAGS = -Wl,-z -Wl,initfirst - -INCLUDES = -I$(srcdir)/demangle - -bin_SCRIPTS = valgrind cachegrind vg_annotate - -SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp - -val_DATA = $(SUPP_FILES) default.supp - -BUILT_SOURCES = default.supp - -default.supp: $(SUPP_FILES) - -bzdist: dist - gunzip -c $(PACKAGE)-$(VERSION).tar.gz | bzip2 > $(PACKAGE)-$(VERSION).tar.bz2 - -EXTRA_DIST = $(val_DATA) \ - PATCHES_APPLIED ACKNOWLEDGEMENTS \ - README_KDE3_FOLKS README_PACKAGERS \ - README_MISSING_SYSCALL_OR_IOCTL TODO dosyms vg_libpthread.vs \ - valgrind.spec valgrind.spec.in - -val_PROGRAMS = valgrind.so valgrinq.so libpthread.so - -libpthread_so_SOURCES = vg_libpthread.c vg_libpthread_unimp.c - -valgrinq_so_SOURCES = vg_valgrinq_dummy.c - -valgrind_so_SOURCES = \ - vg_clientfuncs.c \ - vg_scheduler.c \ - vg_cachesim.c \ - vg_clientmalloc.c \ - vg_clientperms.c \ - vg_demangle.c \ - vg_dispatch.S \ - vg_errcontext.c \ - vg_execontext.c \ - vg_from_ucode.c \ - vg_helpers.S \ - vg_main.c \ - vg_malloc2.c \ - vg_memory.c \ - vg_messages.c \ - vg_mylibc.c \ - vg_procselfmaps.c \ - vg_profile.c \ - vg_signals.c \ - vg_startup.S \ - vg_symtab2.c \ - vg_syscall_mem.c \ - vg_syscall.S \ - vg_to_ucode.c \ - vg_translate.c \ - vg_transtab.c \ - vg_vtagops.c - -valgrind_so_LDADD = \ - demangle/cp-demangle.o \ - demangle/cplus-dem.o \ - demangle/dyn-string.o \ - demangle/safe-ctype.o - -include_HEADERS = valgrind.h - -noinst_HEADERS = \ - vg_cachesim_gen.c \ - vg_cachesim_I1.c \ - vg_cachesim_D1.c \ - vg_cachesim_L2.c \ - vg_kerneliface.h \ - vg_include.h \ - vg_constants.h \ - vg_unsafe.h - -MANUAL_DEPS = $(noinst_HEADERS) $(include_HEADERS) - -vg_memory.o: vg_memory.c $(MANUAL_DEPS) - $(COMPILE) -O2 @PREFERRED_STACK_BOUNDARY@ -c $< - -vg_clientfuncs.o: vg_clientfuncs.c $(MANUAL_DEPS) - $(COMPILE) -fno-omit-frame-pointer -c $< - -vg_libpthread.o: vg_libpthread.c $(MANUAL_DEPS) - $(COMPILE) -fno-omit-frame-pointer -c $< - -valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS) - $(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \ - $(valgrind_so_OBJECTS) $(valgrind_so_LDADD) - -valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS) - $(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS) - -libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs - $(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \ - $(libpthread_so_OBJECTS) \ - -Wl,-version-script $(srcdir)/vg_libpthread.vs - -install-exec-hook: - $(mkinstalldirs) $(DESTDIR)$(valdir) - rm -f $(DESTDIR)$(valdir)/libpthread.so.0 - $(LN_S) libpthread.so $(DESTDIR)$(valdir)/libpthread.so.0 diff --git a/memcheck/docs/Makefile.am b/memcheck/docs/Makefile.am deleted file mode 100644 index e8a58fa18e..0000000000 --- a/memcheck/docs/Makefile.am +++ /dev/null @@ -1,5 +0,0 @@ -docdir = $(datadir)/doc/valgrind - -doc_DATA = index.html manual.html nav.html techdocs.html - -EXTRA_DIST = $(doc_DATA) diff --git a/memcheck/docs/index.html b/memcheck/docs/index.html deleted file mode 100644 index 1111702565..0000000000 --- a/memcheck/docs/index.html +++ /dev/null @@ -1,26 +0,0 @@ - - - - - - - - - - - Valgrind's user manual - - - - - - - <body> - <p>This page uses frames, but your browser doesn't support them.</p> - </body> - - - - diff --git a/memcheck/docs/manual.html b/memcheck/docs/manual.html deleted file mode 100644 index b715ee3dfe..0000000000 --- a/memcheck/docs/manual.html +++ /dev/null @@ -1,2702 +0,0 @@ - - - - Valgrind - - - - -  -

Valgrind, version 1.0.0

-
This manual was last updated on 20020726
-

- -

-jseward@acm.org
-Copyright © 2000-2002 Julian Seward -

-Valgrind is licensed under the GNU General Public License, -version 2
-An open-source tool for finding memory-management problems in -Linux-x86 executables. -

- -

- -


- -

Contents of this manual

- -

Introduction

- 1.1  What Valgrind is for
- 1.2  What it does with your program - -

How to use it, and how to make sense - of the results

- 2.1  Getting started
- 2.2  The commentary
- 2.3  Reporting of errors
- 2.4  Suppressing errors
- 2.5  Command-line flags
- 2.6  Explaination of error messages
- 2.7  Writing suppressions files
- 2.8  The Client Request mechanism
- 2.9  Support for POSIX pthreads
- 2.10  Building and installing
- 2.11  If you have problems
- -

Details of the checking machinery

- 3.1  Valid-value (V) bits
- 3.2  Valid-address (A) bits
- 3.3  Putting it all together
- 3.4  Signals
- 3.5  Memory leak detection
- -

Limitations

- -

How it works -- a rough overview

- 5.1  Getting started
- 5.2  The translation/instrumentation engine
- 5.3  Tracking the status of memory
- 5.4  System calls
- 5.5  Signals
- -

An example

- -

Cache profiling

- -

The design and implementation of Valgrind

- -
- - -

1  Introduction

- - -

1.1  What Valgrind is for

- -Valgrind is a tool to help you find memory-management problems in your -programs. When a program is run under Valgrind's supervision, all -reads and writes of memory are checked, and calls to -malloc/new/free/delete are intercepted. As a result, Valgrind can -detect problems such as: -
    -
  • Use of uninitialised memory
  • -
  • Reading/writing memory after it has been free'd
  • -
  • Reading/writing off the end of malloc'd blocks
  • -
  • Reading/writing inappropriate areas on the stack
  • -
  • Memory leaks -- where pointers to malloc'd blocks are lost - forever
  • -
  • Mismatched use of malloc/new/new [] vs free/delete/delete - []
  • -
  • Some misuses of the POSIX pthreads API
  • -
- -Problems like these can be difficult to find by other means, often -lying undetected for long periods, then causing occasional, -difficult-to-diagnose crashes. - -

-Valgrind is closely tied to details of the CPU, operating system and -to a less extent, compiler and basic C libraries. This makes it -difficult to make it portable, so I have chosen at the outset to -concentrate on what I believe to be a widely used platform: Linux on -x86s. Valgrind uses the standard Unix ./configure, -make, make install mechanism, and I have -attempted to ensure that it works on machines with kernel 2.2 or 2.4 -and glibc 2.1.X or 2.2.X. This should cover the vast majority of -modern Linux installations. - - -

-Valgrind is licensed under the GNU General Public License, version -2. Read the file LICENSE in the source distribution for details. Some -of the PThreads test cases, test/pth_*.c, are taken from -"Pthreads Programming" by Bradford Nichols, Dick Buttlar & Jacqueline -Proulx Farrell, ISBN 1-56592-115-1, published by O'Reilly & -Associates, Inc. - - - -

1.2  What it does with your program

- -Valgrind is designed to be as non-intrusive as possible. It works -directly with existing executables. You don't need to recompile, -relink, or otherwise modify, the program to be checked. Simply place -the word valgrind at the start of the command line -normally used to run the program. So, for example, if you want to run -the command ls -l on Valgrind, simply issue the -command: valgrind ls -l. - -

Valgrind takes control of your program before it starts. Debugging -information is read from the executable and associated libraries, so -that error messages can be phrased in terms of source code -locations. Your program is then run on a synthetic x86 CPU which -checks every memory access. All detected errors are written to a -log. When the program finishes, Valgrind searches for and reports on -leaked memory. - -

You can run pretty much any dynamically linked ELF x86 executable -using Valgrind. Programs run 25 to 50 times slower, and take a lot -more memory, than they usually would. It works well enough to run -large programs. For example, the Konqueror web browser from the KDE -Desktop Environment, version 3.0, runs slowly but usably on Valgrind. - -

Valgrind simulates every single instruction your program executes. -Because of this, it finds errors not only in your application but also -in all supporting dynamically-linked (.so-format) -libraries, including the GNU C library, the X client libraries, Qt, if -you work with KDE, and so on. That often includes libraries, for -example the GNU C library, which contain memory access violations, but -which you cannot or do not want to fix. - -

Rather than swamping you with errors in which you are not -interested, Valgrind allows you to selectively suppress errors, by -recording them in a suppressions file which is read when Valgrind -starts up. The build mechanism attempts to select suppressions which -give reasonable behaviour for the libc and XFree86 versions detected -on your machine. - - -

Section 6 shows an example of use. -

-


- - -

2  How to use it, and how to make sense of the results

- - -

2.1  Getting started

- -First off, consider whether it might be beneficial to recompile your -application and supporting libraries with optimisation disabled and -debugging info enabled (the -g flag). You don't have to -do this, but doing so helps Valgrind produce more accurate and less -confusing error reports. Chances are you're set up like this already, -if you intended to debug your program with GNU gdb, or some other -debugger. - -

-A plausible compromise is to use -g -O. -Optimisation levels above -O have been observed, on very -rare occasions, to cause gcc to generate code which fools Valgrind's -error tracking machinery into wrongly reporting uninitialised value -errors. -O gets you the vast majority of the benefits of -higher optimisation levels anyway, so you don't lose much there. - -

-Valgrind understands both the older "stabs" debugging format, used by -gcc versions prior to 3.1, and the newer DWARF2 format used by gcc 3.1 -and later. - -

-Then just run your application, but place the word -valgrind in front of your usual command-line invokation. -Note that you should run the real (machine-code) executable here. If -your application is started by, for example, a shell or perl script, -you'll need to modify it to invoke Valgrind on the real executables. -Running such scripts directly under Valgrind will result in you -getting error reports pertaining to /bin/sh, -/usr/bin/perl, or whatever interpreter you're using. -This almost certainly isn't what you want and can be confusing. - - -

2.2  The commentary

- -Valgrind writes a commentary, detailing error reports and other -significant events. The commentary goes to standard output by -default. This may interfere with your program, so you can ask for it -to be directed elsewhere. - -

All lines in the commentary are of the following form:
-

-  ==12345== some-message-from-Valgrind
-
-

The 12345 is the process ID. This scheme makes it easy -to distinguish program output from Valgrind commentary, and also easy -to differentiate commentaries from different processes which have -become merged together, for whatever reason. - -

By default, Valgrind writes only essential messages to the commentary, -so as to avoid flooding you with information of secondary importance. -If you want more information about what is happening, re-run, passing -the -v flag to Valgrind. - - - -

2.3  Reporting of errors

- -When Valgrind detects something bad happening in the program, an error -message is written to the commentary. For example:
-
-  ==25832== Invalid read of size 4
-  ==25832==    at 0x8048724: BandMatrix::ReSize(int, int, int) (bogon.cpp:45)
-  ==25832==    by 0x80487AF: main (bogon.cpp:66)
-  ==25832==    by 0x40371E5E: __libc_start_main (libc-start.c:129)
-  ==25832==    by 0x80485D1: (within /home/sewardj/newmat10/bogon)
-  ==25832==    Address 0xBFFFF74C is not stack'd, malloc'd or free'd
-
- -

This message says that the program did an illegal 4-byte read of -address 0xBFFFF74C, which, as far as it can tell, is not a valid stack -address, nor corresponds to any currently malloc'd or free'd blocks. -The read is happening at line 45 of bogon.cpp, called -from line 66 of the same file, etc. For errors associated with an -identified malloc'd/free'd block, for example reading free'd memory, -Valgrind reports not only the location where the error happened, but -also where the associated block was malloc'd/free'd. - -

Valgrind remembers all error reports. When an error is detected, -it is compared against old reports, to see if it is a duplicate. If -so, the error is noted, but no further commentary is emitted. This -avoids you being swamped with bazillions of duplicate error reports. - -

If you want to know how many times each error occurred, run with -the -v option. When execution finishes, all the reports -are printed out, along with, and sorted by, their occurrence counts. -This makes it easy to see which errors have occurred most frequently. - -

Errors are reported before the associated operation actually -happens. For example, if you program decides to read from address -zero, Valgrind will emit a message to this effect, and the program -will then duly die with a segmentation fault. - -

In general, you should try and fix errors in the order that they -are reported. Not doing so can be confusing. For example, a program -which copies uninitialised values to several memory locations, and -later uses them, will generate several error messages. The first such -error message may well give the most direct clue to the root cause of -the problem. - -

The process of detecting duplicate errors is quite an expensive -one and can become a significant performance overhead if your program -generates huge quantities of errors. To avoid serious problems here, -Valgrind will simply stop collecting errors after 300 different errors -have been seen, or 30000 errors in total have been seen. In this -situation you might as well stop your program and fix it, because -Valgrind won't tell you anything else useful after this. Note that -the 300/30000 limits apply after suppressed errors are removed. These -limits are defined in vg_include.h and can be increased -if necessary. - -

To avoid this cutoff you can use the ---error-limit=no flag. Then valgrind will always show -errors, regardless of how many there are. Use this flag carefully, -since it may have a dire effect on performance. - - - -

2.4  Suppressing errors

- -Valgrind detects numerous problems in the base libraries, such as the -GNU C library, and the XFree86 client libraries, which come -pre-installed on your GNU/Linux system. You can't easily fix these, -but you don't want to see these errors (and yes, there are many!) So -Valgrind reads a list of errors to suppress at startup. -A default suppression file is cooked up by the -./configure script. - -

You can modify and add to the suppressions file at your leisure, -or, better, write your own. Multiple suppression files are allowed. -This is useful if part of your project contains errors you can't or -don't want to fix, yet you don't want to continuously be reminded of -them. - -

Each error to be suppressed is described very specifically, to -minimise the possibility that a suppression-directive inadvertantly -suppresses a bunch of similar errors which you did want to see. The -suppression mechanism is designed to allow precise yet flexible -specification of errors to suppress. - -

If you use the -v flag, at the end of execution, Valgrind -prints out one line for each used suppression, giving its name and the -number of times it got used. Here's the suppressions used by a run of -ls -l: -

-  --27579-- supp: 1 socketcall.connect(serv_addr)/__libc_connect/__nscd_getgrgid_r
-  --27579-- supp: 1 socketcall.connect(serv_addr)/__libc_connect/__nscd_getpwuid_r
-  --27579-- supp: 6 strrchr/_dl_map_object_from_fd/_dl_map_object
-
- - -

2.5  Command-line flags

- -You invoke Valgrind like this: -
-  valgrind [options-for-Valgrind] your-prog [options for your-prog]
-
- -

Note that Valgrind also reads options from the environment variable -$VALGRIND, and processes them before the command-line -options. - -

Valgrind's default settings succeed in giving reasonable behaviour -in most cases. Available options, in no particular order, are as -follows: -

    -
  • --help

  • - -
  • --version
    -

    The usual deal.


  • - -

  • -v --verbose
    -

    Be more verbose. Gives extra information on various aspects - of your program, such as: the shared objects loaded, the - suppressions used, the progress of the instrumentation engine, - and warnings about unusual behaviour. -


  • - -

  • -q --quiet
    -

    Run silently, and only print error messages. Useful if you - are running regression tests or have some other automated test - machinery. -


  • - -

  • --demangle=no
    - --demangle=yes [the default] -

    Disable/enable automatic demangling (decoding) of C++ names. - Enabled by default. When enabled, Valgrind will attempt to - translate encoded C++ procedure names back to something - approaching the original. The demangler handles symbols mangled - by g++ versions 2.X and 3.X. - -

    An important fact about demangling is that function - names mentioned in suppressions files should be in their mangled - form. Valgrind does not demangle function names when searching - for applicable suppressions, because to do otherwise would make - suppressions file contents dependent on the state of Valgrind's - demangling machinery, and would also be slow and pointless. -


  • - -

  • --num-callers=<number> [default=4]
    -

    By default, Valgrind shows four levels of function call names - to help you identify program locations. You can change that - number with this option. This can help in determining the - program's location in deeply-nested call chains. Note that errors - are commoned up using only the top three function locations (the - place in the current function, and that of its two immediate - callers). So this doesn't affect the total number of errors - reported. -

    - The maximum value for this is 50. Note that higher settings - will make Valgrind run a bit more slowly and take a bit more - memory, but can be useful when working with programs with - deeply-nested call chains. -


  • - -

  • --gdb-attach=no [the default]
    - --gdb-attach=yes -

    When enabled, Valgrind will pause after every error shown, - and print the line -
    - ---- Attach to GDB ? --- [Return/N/n/Y/y/C/c] ---- -

    - Pressing Ret, or N Ret - or n Ret, causes Valgrind not to - start GDB for this error. -

    - Y Ret - or y Ret causes Valgrind to - start GDB, for the program at this point. When you have - finished with GDB, quit from it, and the program will continue. - Trying to continue from inside GDB doesn't work. -

    - C Ret - or c Ret causes Valgrind not to - start GDB, and not to ask again. -

    - --gdb-attach=yes conflicts with - --trace-children=yes. You can't use them together. - Valgrind refuses to start up in this situation. 1 May 2002: - this is a historical relic which could be easily fixed if it - gets in your way. Mail me and complain if this is a problem for - you.


  • - -

  • --partial-loads-ok=yes [the default]
    - --partial-loads-ok=no -

    Controls how Valgrind handles word (4-byte) loads from - addresses for which some bytes are addressible and others - are not. When yes (the default), such loads - do not elicit an address error. Instead, the loaded V bytes - corresponding to the illegal addresses indicate undefined, and - those corresponding to legal addresses are loaded from shadow - memory, as usual. -

    - When no, loads from partially - invalid addresses are treated the same as loads from completely - invalid addresses: an illegal-address error is issued, - and the resulting V bytes indicate valid data. -


  • - -

  • --sloppy-malloc=no [the default]
    - --sloppy-malloc=yes -

    When enabled, all requests for malloc/calloc are rounded up - to a whole number of machine words -- in other words, made - divisible by 4. For example, a request for 17 bytes of space - would result in a 20-byte area being made available. This works - around bugs in sloppy libraries which assume that they can - safely rely on malloc/calloc requests being rounded up in this - fashion. Without the workaround, these libraries tend to - generate large numbers of errors when they access the ends of - these areas. -

    - Valgrind snapshots dated 17 Feb 2002 and later are - cleverer about this problem, and you should no longer need to - use this flag. To put it bluntly, if you do need to use this - flag, your program violates the ANSI C semantics defined for - malloc and free, even if it appears to - work correctly, and you should fix it, at least if you hope for - maximum portability. -


  • - -

  • --alignment=<number> [default: 4]

    By - default valgrind's malloc, realloc, - etc, return 4-byte aligned addresses. These are suitable for - any accesses on x86 processors. - Some programs might however assume that malloc et - al return 8- or more aligned memory. - These programs are broken and should be fixed, but - if this is impossible for whatever reason the alignment can be - increased using this parameter. The supplied value must be - between 4 and 4096 inclusive, and must be a power of two.


  • - -

  • --trace-children=no [the default]
    - --trace-children=yes -

    When enabled, Valgrind will trace into child processes. This - is confusing and usually not what you want, so is disabled by - default. As of 1 May 2002, tracing into a child process from a - parent which uses libpthread.so is probably broken - and is likely to cause breakage. Please report any such - problems to me.


  • - -

  • --freelist-vol=<number> [default: 1000000] -

    When the client program releases memory using free (in C) or - delete (C++), that memory is not immediately made available for - re-allocation. Instead it is marked inaccessible and placed in - a queue of freed blocks. The purpose is to delay the point at - which freed-up memory comes back into circulation. This - increases the chance that Valgrind will be able to detect - invalid accesses to blocks for some significant period of time - after they have been freed. -

    - This flag specifies the maximum total size, in bytes, of the - blocks in the queue. The default value is one million bytes. - Increasing this increases the total amount of memory used by - Valgrind but may detect invalid uses of freed blocks which would - otherwise go undetected.


  • - -

  • --logfile-fd=<number> [default: 2, stderr] -

    Specifies the file descriptor on which Valgrind communicates - all of its messages. The default, 2, is the standard error - channel. This may interfere with the client's own use of - stderr. To dump Valgrind's commentary in a file without using - stderr, something like the following works well (sh/bash - syntax):
    -    - valgrind --logfile-fd=9 my_prog 9> logfile
    - That is: tell Valgrind to send all output to file descriptor 9, - and ask the shell to route file descriptor 9 to "logfile". -


  • - -

  • --suppressions=<filename> - [default: $PREFIX/lib/valgrind/default.supp] -

    Specifies an extra - file from which to read descriptions of errors to suppress. You - may use as many extra suppressions files as you - like.


  • - -

  • --leak-check=no [default]
    - --leak-check=yes -

    When enabled, search for memory leaks when the client program - finishes. A memory leak means a malloc'd block, which has not - yet been free'd, but to which no pointer can be found. Such a - block can never be free'd by the program, since no pointer to it - exists. Leak checking is disabled by default because it tends - to generate dozens of error messages.


  • - -

  • --show-reachable=no [default]
    - --show-reachable=yes -

    When disabled, the memory leak detector only shows blocks for - which it cannot find a pointer to at all, or it can only find a - pointer to the middle of. These blocks are prime candidates for - memory leaks. When enabled, the leak detector also reports on - blocks which it could find a pointer to. Your program could, at - least in principle, have freed such blocks before exit. - Contrast this to blocks for which no pointer, or only an - interior pointer could be found: they are more likely to - indicate memory leaks, because you do not actually have a - pointer to the start of the block which you can hand to - free, even if you wanted to.


  • - -

  • --leak-resolution=low [default]
    - --leak-resolution=med
    - --leak-resolution=high -

    When doing leak checking, determines how willing Valgrind is - to consider different backtraces to be the same. When set to - low, the default, only the first two entries need - match. When med, four entries have to match. When - high, all entries need to match. -

    - For hardcore leak debugging, you probably want to use - --leak-resolution=high together with - --num-callers=40 or some such large number. Note - however that this can give an overwhelming amount of - information, which is why the defaults are 4 callers and - low-resolution matching. -

    - Note that the --leak-resolution= setting does not - affect Valgrind's ability to find leaks. It only changes how - the results are presented. -


  • - -

  • --workaround-gcc296-bugs=no [default]
    - --workaround-gcc296-bugs=yes

    When enabled, - assume that reads and writes some small distance below the stack - pointer %esp are due to bugs in gcc 2.96, and does - not report them. The "small distance" is 256 bytes by default. - Note that gcc 2.96 is the default compiler on some popular Linux - distributions (RedHat 7.X, Mandrake) and so you may well need to - use this flag. Do not use it if you do not have to, as it can - cause real errors to be overlooked. Another option is to use a - gcc/g++ which does not generate accesses below the stack - pointer. 2.95.3 seems to be a good choice in this respect. -

    - Unfortunately (27 Feb 02) it looks like g++ 3.0.4 has a similar - bug, so you may need to issue this flag if you use 3.0.4. A - while later (early Apr 02) this is confirmed as a scheduling bug - in g++-3.0.4. -


  • - -

  • --error-limit=yes [default]
    - --error-limit=no

    When enabled, valgrind stops - reporting errors after 30000 in total, or 300 different ones, - have been seen. This is to stop the error tracking machinery - from becoming a huge performance overhead in programs with many - errors.


  • - -

  • --cachesim=no [default]
    - --cachesim=yes

    When enabled, turns off memory - checking, and turns on cache profiling. Cache profiling is - described in detail in Section 7. -


  • - -

  • --weird-hacks=hack1,hack2,... - Pass miscellaneous hints to Valgrind which slightly modify the - simulated behaviour in nonstandard or dangerous ways, possibly - to help the simulation of strange features. By default no hacks - are enabled. Use with caution! Currently known hacks are: -

    -

      -
    • ioctl-VTIME Use this if you have a program - which sets readable file descriptors to have a timeout by - doing ioctl on them with a - TCSETA-style command and a non-zero - VTIME timeout value. This is considered - potentially dangerous and therefore is not engaged by - default, because it is (remotely) conceivable that it could - cause threads doing read to incorrectly block - the entire process. -

      - You probably want to try this one if you have a program - which unexpectedly blocks in a read from a file - descriptor which you know to have been messed with by - ioctl. This could happen, for example, if the - descriptor is used to read input from some kind of screen - handling library. -

      - To find out if your program is blocking unexpectedly in the - read system call, run with - --trace-syscalls=yes flag. -

      -

    • truncate-writes Use this if you have a threaded - program which appears to unexpectedly block whilst writing - into a pipe. The effect is to modify all calls to - write() so that requests to write more than - 4096 bytes are treated as if they only requested a write of - 4096 bytes. Valgrind does this by changing the - count argument of write(), as - passed to the kernel, so that it is at most 4096. The - amount of data written will then be less than the client - program asked for, but the client should have a loop around - its write() call to check whether the requested - number of bytes have been written. If not, it should issue - further write() calls until all the data is - written. -

      - This all sounds pretty dodgy to me, which is why I've made - this behaviour only happen on request. It is not the - default behaviour. At the time of writing this (30 June - 2002) I have only seen one example where this is necessary, - so either the problem is extremely rare or nobody is using - Valgrind :-) -

      - On experimentation I see that truncate-writes - doesn't interact well with ioctl-VTIME, so you - probably don't want to try both at once. -

      - As above, to find out if your program is blocking - unexpectedly in the write() system call, you - may find the --trace-syscalls=yes - --trace-sched=yes flags useful. -

    - -
  • -

- -There are also some options for debugging Valgrind itself. You -shouldn't need to use them in the normal run of things. Nevertheless: - -
    - -
  • --single-step=no [default]
    - --single-step=yes -

    When enabled, each x86 insn is translated seperately into - instrumented code. When disabled, translation is done on a - per-basic-block basis, giving much better translations.


  • -

    - -

  • --optimise=no
    - --optimise=yes [default] -

    When enabled, various improvements are applied to the - intermediate code, mainly aimed at allowing the simulated CPU's - registers to be cached in the real CPU's registers over several - simulated instructions.


  • -

    - -

  • --instrument=no
    - --instrument=yes [default] -

    When disabled, the translations don't actually contain any - instrumentation.


  • -

    - -

  • --cleanup=no
    - --cleanup=yes [default] -

    When enabled, various improvments are applied to the - post-instrumented intermediate code, aimed at removing redundant - value checks.


  • -

    - -

  • --trace-syscalls=no [default]
    - --trace-syscalls=yes -

    Enable/disable tracing of system call intercepts.


  • -

    - -

  • --trace-signals=no [default]
    - --trace-signals=yes -

    Enable/disable tracing of signal handling.


  • -

    - -

  • --trace-sched=no [default]
    - --trace-sched=yes -

    Enable/disable tracing of thread scheduling events.


  • -

    - -

  • --trace-pthread=none [default]
    - --trace-pthread=some
    - --trace-pthread=all -

    Specifies amount of trace detail for pthread-related events.


  • -

    - -

  • --trace-symtab=no [default]
    - --trace-symtab=yes -

    Enable/disable tracing of symbol table reading.


  • -

    - -

  • --trace-malloc=no [default]
    - --trace-malloc=yes -

    Enable/disable tracing of malloc/free (et al) intercepts. -


  • -

    - -

  • --stop-after=<number> - [default: infinity, more or less] -

    After <number> basic blocks have been executed, shut down - Valgrind and switch back to running the client on the real CPU. -


  • -

    - -

  • --dump-error=<number> [default: inactive] -

    After the program has exited, show gory details of the - translation of the basic block containing the <number>'th - error context. When used with --single-step=yes, - can show the exact x86 instruction causing an error. This is - all fairly dodgy and doesn't work at all if threads are - involved.


  • -

    -

- - - -

2.6  Explaination of error messages

- -Despite considerable sophistication under the hood, Valgrind can only -really detect two kinds of errors, use of illegal addresses, and use -of undefined values. Nevertheless, this is enough to help you -discover all sorts of memory-management nasties in your code. This -section presents a quick summary of what error messages mean. The -precise behaviour of the error-checking machinery is described in -Section 4. - - -

2.6.1  Illegal read / Illegal write errors

-For example: -
-  Invalid read of size 4
-     at 0x40F6BBCC: (within /usr/lib/libpng.so.2.1.0.9)
-     by 0x40F6B804: (within /usr/lib/libpng.so.2.1.0.9)
-     by 0x40B07FF4: read_png_image__FP8QImageIO (kernel/qpngio.cpp:326)
-     by 0x40AC751B: QImageIO::read() (kernel/qimage.cpp:3621)
-     Address 0xBFFFF0E0 is not stack'd, malloc'd or free'd
-
- -

This happens when your program reads or writes memory at a place -which Valgrind reckons it shouldn't. In this example, the program did -a 4-byte read at address 0xBFFFF0E0, somewhere within the -system-supplied library libpng.so.2.1.0.9, which was called from -somewhere else in the same library, called from line 326 of -qpngio.cpp, and so on. - -

Valgrind tries to establish what the illegal address might relate -to, since that's often useful. So, if it points into a block of -memory which has already been freed, you'll be informed of this, and -also where the block was free'd at. Likewise, if it should turn out -to be just off the end of a malloc'd block, a common result of -off-by-one-errors in array subscripting, you'll be informed of this -fact, and also where the block was malloc'd. - -

In this example, Valgrind can't identify the address. Actually the -address is on the stack, but, for some reason, this is not a valid -stack address -- it is below the stack pointer, %esp, and that isn't -allowed. In this particular case it's probably caused by gcc -generating invalid code, a known bug in various flavours of gcc. - -

Note that Valgrind only tells you that your program is about to -access memory at an illegal address. It can't stop the access from -happening. So, if your program makes an access which normally would -result in a segmentation fault, you program will still suffer the same -fate -- but you will get a message from Valgrind immediately prior to -this. In this particular example, reading junk on the stack is -non-fatal, and the program stays alive. - - -

2.6.2  Use of uninitialised values

-For example: -
-  Conditional jump or move depends on uninitialised value(s)
-     at 0x402DFA94: _IO_vfprintf (_itoa.h:49)
-     by 0x402E8476: _IO_printf (printf.c:36)
-     by 0x8048472: main (tests/manuel1.c:8)
-     by 0x402A6E5E: __libc_start_main (libc-start.c:129)
-
- -

An uninitialised-value use error is reported when your program uses -a value which hasn't been initialised -- in other words, is undefined. -Here, the undefined value is used somewhere inside the printf() -machinery of the C library. This error was reported when running the -following small program: -

-  int main()
-  {
-    int x;
-    printf ("x = %d\n", x);
-  }
-
- -

It is important to understand that your program can copy around -junk (uninitialised) data to its heart's content. Valgrind observes -this and keeps track of the data, but does not complain. A complaint -is issued only when your program attempts to make use of uninitialised -data. In this example, x is uninitialised. Valgrind observes the -value being passed to _IO_printf and thence to _IO_vfprintf, but makes -no comment. However, _IO_vfprintf has to examine the value of x so it -can turn it into the corresponding ASCII string, and it is at this -point that Valgrind complains. - -

Sources of uninitialised data tend to be: -

    -
  • Local variables in procedures which have not been initialised, - as in the example above.

  • - -

  • The contents of malloc'd blocks, before you write something - there. In C++, the new operator is a wrapper round malloc, so - if you create an object with new, its fields will be - uninitialised until you fill them in, which is only Right and - Proper.
  • -
- - - -

2.6.3  Illegal frees

-For example: -
-  Invalid free()
-     at 0x4004FFDF: free (ut_clientmalloc.c:577)
-     by 0x80484C7: main (tests/doublefree.c:10)
-     by 0x402A6E5E: __libc_start_main (libc-start.c:129)
-     by 0x80483B1: (within tests/doublefree)
-     Address 0x3807F7B4 is 0 bytes inside a block of size 177 free'd
-     at 0x4004FFDF: free (ut_clientmalloc.c:577)
-     by 0x80484C7: main (tests/doublefree.c:10)
-     by 0x402A6E5E: __libc_start_main (libc-start.c:129)
-     by 0x80483B1: (within tests/doublefree)
-
-

Valgrind keeps track of the blocks allocated by your program with -malloc/new, so it can know exactly whether or not the argument to -free/delete is legitimate or not. Here, this test program has -freed the same block twice. As with the illegal read/write errors, -Valgrind attempts to make sense of the address free'd. If, as -here, the address is one which has previously been freed, you wil -be told that -- making duplicate frees of the same block easy to spot. - - -

2.6.4  When a block is freed with an inappropriate -deallocation function

-In the following example, a block allocated with new[] -has wrongly been deallocated with free: -
-  Mismatched free() / delete / delete []
-     at 0x40043249: free (vg_clientfuncs.c:171)
-     by 0x4102BB4E: QGArray::~QGArray(void) (tools/qgarray.cpp:149)
-     by 0x4C261C41: PptDoc::~PptDoc(void) (include/qmemarray.h:60)
-     by 0x4C261F0E: PptXml::~PptXml(void) (pptxml.cc:44)
-     Address 0x4BB292A8 is 0 bytes inside a block of size 64 alloc'd
-     at 0x4004318C: __builtin_vec_new (vg_clientfuncs.c:152)
-     by 0x4C21BC15: KLaola::readSBStream(int) const (klaola.cc:314)
-     by 0x4C21C155: KLaola::stream(KLaola::OLENode const *) (klaola.cc:416)
-     by 0x4C21788F: OLEFilter::convert(QCString const &) (olefilter.cc:272)
-
-The following was told to me be the KDE 3 developers. I didn't know -any of it myself. They also implemented the check itself. -

-In C++ it's important to deallocate memory in a way compatible with -how it was allocated. The deal is: -

    -
  • If allocated with malloc, calloc, - realloc, valloc or - memalign, you must deallocate with free. -
  • If allocated with new[], you must deallocate with - delete[]. -
  • If allocated with new, you must deallocate with - delete. -
-The worst thing is that on Linux apparently it doesn't matter if you -do muddle these up, and it all seems to work ok, but the same program -may then crash on a different platform, Solaris for example. So it's -best to fix it properly. According to the KDE folks "it's amazing how -many C++ programmers don't know this". -

-Pascal Massimino adds the following clarification: -delete[] must be called associated with a -new[] because the compiler stores the size of the array -and the pointer-to-member to the destructor of the array's content -just before the pointer actually returned. This implies a -variable-sized overhead in what's returned by new or -new[]. It rather surprising how compilers [Ed: -runtime-support libraries?] are robust to mismatch in -new/delete -new[]/delete[]. - - -

2.6.5  Passing system call parameters with inadequate -read/write permissions

- -Valgrind checks all parameters to system calls. If a system call -needs to read from a buffer provided by your program, Valgrind checks -that the entire buffer is addressible and has valid data, ie, it is -readable. And if the system call needs to write to a user-supplied -buffer, Valgrind checks that the buffer is addressible. After the -system call, Valgrind updates its administrative information to -precisely reflect any changes in memory permissions caused by the -system call. - -

Here's an example of a system call with an invalid parameter: -

-  #include <stdlib.h>
-  #include <unistd.h>
-  int main( void )
-  {
-    char* arr = malloc(10);
-    (void) write( 1 /* stdout */, arr, 10 );
-    return 0;
-  }
-
- -

You get this complaint ... -

-  Syscall param write(buf) contains uninitialised or unaddressable byte(s)
-     at 0x4035E072: __libc_write
-     by 0x402A6E5E: __libc_start_main (libc-start.c:129)
-     by 0x80483B1: (within tests/badwrite)
-     by <bogus frame pointer> ???
-     Address 0x3807E6D0 is 0 bytes inside a block of size 10 alloc'd
-     at 0x4004FEE6: malloc (ut_clientmalloc.c:539)
-     by 0x80484A0: main (tests/badwrite.c:6)
-     by 0x402A6E5E: __libc_start_main (libc-start.c:129)
-     by 0x80483B1: (within tests/badwrite)
-
- -

... because the program has tried to write uninitialised junk from -the malloc'd block to the standard output. - - -

2.6.6  Warning messages you might see

- -Most of these only appear if you run in verbose mode (enabled by --v): -
    -
  • More than 50 errors detected. Subsequent errors - will still be recorded, but in less detail than before. -
    - After 50 different errors have been shown, Valgrind becomes - more conservative about collecting them. It then requires only - the program counters in the top two stack frames to match when - deciding whether or not two errors are really the same one. - Prior to this point, the PCs in the top four frames are required - to match. This hack has the effect of slowing down the - appearance of new errors after the first 50. The 50 constant can - be changed by recompiling Valgrind. -

    -

  • More than 300 errors detected. I'm not reporting any more. - Final error counts may be inaccurate. Go fix your - program! -
    - After 300 different errors have been detected, Valgrind ignores - any more. It seems unlikely that collecting even more different - ones would be of practical help to anybody, and it avoids the - danger that Valgrind spends more and more of its time comparing - new errors against an ever-growing collection. As above, the 300 - number is a compile-time constant. -

    -

  • Warning: client switching stacks? -
    - Valgrind spotted such a large change in the stack pointer, %esp, - that it guesses the client is switching to a different stack. - At this point it makes a kludgey guess where the base of the new - stack is, and sets memory permissions accordingly. You may get - many bogus error messages following this, if Valgrind guesses - wrong. At the moment "large change" is defined as a change of - more that 2000000 in the value of the %esp (stack pointer) - register. -

    -

  • Warning: client attempted to close Valgrind's logfile fd <number> - -
    - Valgrind doesn't allow the client - to close the logfile, because you'd never see any diagnostic - information after that point. If you see this message, - you may want to use the --logfile-fd=<number> - option to specify a different logfile file-descriptor number. -

    -

  • Warning: noted but unhandled ioctl <number> -
    - Valgrind observed a call to one of the vast family of - ioctl system calls, but did not modify its - memory status info (because I have not yet got round to it). - The call will still have gone through, but you may get spurious - errors after this as a result of the non-update of the memory info. -

    -

  • Warning: set address range perms: large range <number> -
    - Diagnostic message, mostly for my benefit, to do with memory - permissions. -
- - - -

2.7  Writing suppressions files

- -A suppression file describes a bunch of errors which, for one reason -or another, you don't want Valgrind to tell you about. Usually the -reason is that the system libraries are buggy but unfixable, at least -within the scope of the current debugging session. Multiple -suppressions files are allowed. By default, Valgrind uses -$PREFIX/lib/valgrind/default.supp. - -

-You can ask to add suppressions from another file, by specifying ---suppressions=/path/to/file.supp. - -

Each suppression has the following components:
-

    - -
  • Its name. This merely gives a handy name to the suppression, by - which it is referred to in the summary of used suppressions - printed out when a program finishes. It's not important what - the name is; any identifying string will do. -

    - -

  • The nature of the error to suppress. Either: - Value1, - Value2, - Value4 or - Value8, - meaning an uninitialised-value error when - using a value of 1, 2, 4 or 8 bytes. - Or - Cond (or its old name, Value0), - meaning use of an uninitialised CPU condition code. Or: - Addr1, - Addr2, - Addr4 or - Addr8, meaning an invalid address during a - memory access of 1, 2, 4 or 8 bytes respectively. Or - Param, - meaning an invalid system call parameter error. Or - Free, meaning an invalid or mismatching free. - Or PThread, meaning any kind of complaint to do - with the PThreads API.

  • -

    - -

  • The "immediate location" specification. For Value and Addr - errors, is either the name of the function in which the error - occurred, or, failing that, the full path the the .so file - containing the error location. For Param errors, is the name of - the offending system call parameter. For Free errors, is the - name of the function doing the freeing (eg, free, - __builtin_vec_delete, etc)

  • -

    - -

  • The caller of the above "immediate location". Again, either a - function or shared-object name.

  • -

    - -

  • Optionally, one or two extra calling-function or object names, - for greater precision.
  • -
- -

-Locations may be either names of shared objects or wildcards matching -function names. They begin obj: and fun: -respectively. Function and object names to match against may use the -wildcard characters * and ?. - -A suppression only suppresses an error when the error matches all the -details in the suppression. Here's an example: -

-  {
-    __gconv_transform_ascii_internal/__mbrtowc/mbtowc
-    Value4
-    fun:__gconv_transform_ascii_internal
-    fun:__mbr*toc
-    fun:mbtowc
-  }
-
- -

What is means is: suppress a use-of-uninitialised-value error, when -the data size is 4, when it occurs in the function -__gconv_transform_ascii_internal, when that is called -from any function of name matching __mbr*toc, -when that is called from -mbtowc. It doesn't apply under any other circumstances. -The string by which this suppression is identified to the user is -__gconv_transform_ascii_internal/__mbrtowc/mbtowc. - -

Another example: -

-  {
-    libX11.so.6.2/libX11.so.6.2/libXaw.so.7.0
-    Value4
-    obj:/usr/X11R6/lib/libX11.so.6.2
-    obj:/usr/X11R6/lib/libX11.so.6.2
-    obj:/usr/X11R6/lib/libXaw.so.7.0
-  }
-
- -

Suppress any size 4 uninitialised-value error which occurs anywhere -in libX11.so.6.2, when called from anywhere in the same -library, when called from anywhere in libXaw.so.7.0. The -inexact specification of locations is regrettable, but is about all -you can hope for, given that the X11 libraries shipped with Red Hat -7.2 have had their symbol tables removed. - -

Note -- since the above two examples did not make it clear -- that -you can freely mix the obj: and fun: -styles of description within a single suppression record. - - - -

2.8  The Client Request mechanism

- -Valgrind has a trapdoor mechanism via which the client program can -pass all manner of requests and queries to Valgrind. Internally, this -is used extensively to make malloc, free, signals, threads, etc, work, -although you don't see that. -

-For your convenience, a subset of these so-called client requests is -provided to allow you to tell Valgrind facts about the behaviour of -your program, and conversely to make queries. In particular, your -program can tell Valgrind about changes in memory range permissions -that Valgrind would not otherwise know about, and so allows clients to -get Valgrind to do arbitrary custom checks. -

-Clients need to include the header file valgrind.h to -make this work. The macros therein have the magical property that -they generate code in-line which Valgrind can spot. However, the code -does nothing when not run on Valgrind, so you are not forced to run -your program on Valgrind just because you use the macros in this file. -Also, you are not required to link your program with any extra -supporting libraries. -

-A brief description of the available macros: -

    -
  • VALGRIND_MAKE_NOACCESS, - VALGRIND_MAKE_WRITABLE and - VALGRIND_MAKE_READABLE. These mark address - ranges as completely inaccessible, accessible but containing - undefined data, and accessible and containing defined data, - respectively. Subsequent errors may have their faulting - addresses described in terms of these blocks. Returns a - "block handle". Returns zero when not run on Valgrind. -

    -

  • VALGRIND_DISCARD: At some point you may want - Valgrind to stop reporting errors in terms of the blocks - defined by the previous three macros. To do this, the above - macros return a small-integer "block handle". You can pass - this block handle to VALGRIND_DISCARD. After - doing so, Valgrind will no longer be able to relate - addressing errors to the user-defined block associated with - the handle. The permissions settings associated with the - handle remain in place; this just affects how errors are - reported, not whether they are reported. Returns 1 for an - invalid handle and 0 for a valid handle (although passing - invalid handles is harmless). Always returns 0 when not run - on Valgrind. -

    -

  • VALGRIND_CHECK_NOACCESS, - VALGRIND_CHECK_WRITABLE and - VALGRIND_CHECK_READABLE: check immediately - whether or not the given address range has the relevant - property, and if not, print an error message. Also, for the - convenience of the client, returns zero if the relevant - property holds; otherwise, the returned value is the address - of the first byte for which the property is not true. - Always returns 0 when not run on Valgrind. -

    -

  • VALGRIND_CHECK_NOACCESS: a quick and easy way - to find out whether Valgrind thinks a particular variable - (lvalue, to be precise) is addressible and defined. Prints - an error message if not. Returns no value. -

    -

  • VALGRIND_MAKE_NOACCESS_STACK: a highly - experimental feature. Similarly to - VALGRIND_MAKE_NOACCESS, this marks an address - range as inaccessible, so that subsequent accesses to an - address in the range gives an error. However, this macro - does not return a block handle. Instead, all annotations - created like this are reviewed at each client - ret (subroutine return) instruction, and those - which now define an address range block the client's stack - pointer register (%esp) are automatically - deleted. -

    - In other words, this macro allows the client to tell - Valgrind about red-zones on its own stack. Valgrind - automatically discards this information when the stack - retreats past such blocks. Beware: hacky and flaky, and - probably interacts badly with the new pthread support. -

    -

  • RUNNING_ON_VALGRIND: returns 1 if running on - Valgrind, 0 if running on the real CPU. -

    -

  • VALGRIND_DO_LEAK_CHECK: run the memory leak detector - right now. Returns no value. I guess this could be used to - incrementally check for leaks between arbitrary places in the - program's execution. Warning: not properly tested! -

    -

  • VALGRIND_DISCARD_TRANSLATIONS: discard translations - of code in the specified address range. Useful if you are - debugging a JITter or some other dynamic code generation system. - After this call, attempts to execute code in the invalidated - address range will cause valgrind to make new translations of that - code, which is probably the semantics you want. Note that this is - implemented naively, and involves checking all 200191 entries in - the translation table to see if any of them overlap the specified - address range. So try not to call it often, or performance will - nosedive. Note that you can be clever about this: you only need - to call it when an area which previously contained code is - overwritten with new code. You can choose to write code into - fresh memory, and just call this occasionally to discard large - chunks of old code all at once. -

    - Warning: minimally tested, especially for the cache simulator. -

-

- - - -

2.9  Support for POSIX Pthreads

- -As of late April 02, Valgrind supports programs which use POSIX -pthreads. Doing this has proved technically challenging but is now -mostly complete. It works well enough for significant threaded -applications to work. -

-It works as follows: threaded apps are (dynamically) linked against -libpthread.so. Usually this is the one installed with -your Linux distribution. Valgrind, however, supplies its own -libpthread.so and automatically connects your program to -it instead. -

-The fake libpthread.so and Valgrind cooperate to -implement a user-space pthreads package. This approach avoids the -horrible implementation problems of implementing a truly -multiprocessor version of Valgrind, but it does mean that threaded -apps run only on one CPU, even if you have a multiprocessor machine. -

-Valgrind schedules your threads in a round-robin fashion, with all -threads having equal priority. It switches threads every 50000 basic -blocks (typically around 300000 x86 instructions), which means you'll -get a much finer interleaving of thread executions than when run -natively. This in itself may cause your program to behave differently -if you have some kind of concurrency, critical race, locking, or -similar, bugs. -

-The current (valgrind-1.0 release) state of pthread support is as -follows: -

    -
  • Mutexes, condition variables, thread-specific data, - pthread_once, reader-writer locks, semaphores, - cleanup stacks, cancellation and thread detaching currently work. - Various attribute-like calls are handled but ignored; you get a - warning message. -

    -

  • Currently the following syscalls are thread-safe (nonblocking): - write read nanosleep - sleep select poll - recvmsg and - accept. -

    -

  • Signals in pthreads are now handled properly(ish): - pthread_sigmask, pthread_kill, - sigwait and raise are now implemented. - Each thread has its own signal mask, as POSIX requires. - It's a bit kludgey -- there's a system-wide pending signal set, - rather than one for each thread. But hey. -
- - -As of 18 May 02, the following threaded programs now work fine on my -RedHat 7.2 box: Opera 6.0Beta2, KNode in KDE 3.0, Mozilla-0.9.2.1 and -Galeon-0.11.3, both as supplied with RedHat 7.2. Also Mozilla 1.0RC2. -OpenOffice 1.0. MySQL 3.something (the current stable release). - - -

2.10  Building and installing

- -We now use the standard Unix ./configure, -make, make install mechanism, and I have -attempted to ensure that it works on machines with kernel 2.2 or 2.4 -and glibc 2.1.X or 2.2.X. I don't think there is much else to say. -There are no options apart from the usual --prefix that -you should give to ./configure. - -

-The configure script tests the version of the X server -currently indicated by the current $DISPLAY. This is a -known bug. The intention was to detect the version of the current -XFree86 client libraries, so that correct suppressions could be -selected for them, but instead the test checks the server version. -This is just plain wrong. - -

-If you are building a binary package of Valgrind for distribution, -please read README_PACKAGERS. It contains some important -information. - -

-Apart from that there is no excitement here. Let me know if you have -build problems. - - - - -

2.11  If you have problems

-Mail me (jseward@acm.org). - -

See Section 4 for the known limitations of -Valgrind, and for a list of programs which are known not to work on -it. - -

The translator/instrumentor has a lot of assertions in it. They -are permanently enabled, and I have no plans to disable them. If one -of these breaks, please mail me! - -

If you get an assertion failure on the expression -chunkSane(ch) in vg_free() in -vg_malloc.c, this may have happened because your program -wrote off the end of a malloc'd block, or before its beginning. -Valgrind should have emitted a proper message to that effect before -dying in this way. This is a known problem which I should fix. -

- -


- - -

3  Details of the checking machinery

- -Read this section if you want to know, in detail, exactly what and how -Valgrind is checking. - - -

3.1  Valid-value (V) bits

- -It is simplest to think of Valgrind implementing a synthetic Intel x86 -CPU which is identical to a real CPU, except for one crucial detail. -Every bit (literally) of data processed, stored and handled by the -real CPU has, in the synthetic CPU, an associated "valid-value" bit, -which says whether or not the accompanying bit has a legitimate value. -In the discussions which follow, this bit is referred to as the V -(valid-value) bit. - -

Each byte in the system therefore has a 8 V bits which follow -it wherever it goes. For example, when the CPU loads a word-size item -(4 bytes) from memory, it also loads the corresponding 32 V bits from -a bitmap which stores the V bits for the process' entire address -space. If the CPU should later write the whole or some part of that -value to memory at a different address, the relevant V bits will be -stored back in the V-bit bitmap. - -

In short, each bit in the system has an associated V bit, which -follows it around everywhere, even inside the CPU. Yes, the CPU's -(integer and %eflags) registers have their own V bit -vectors. - -

Copying values around does not cause Valgrind to check for, or -report on, errors. However, when a value is used in a way which might -conceivably affect the outcome of your program's computation, the -associated V bits are immediately checked. If any of these indicate -that the value is undefined, an error is reported. - -

Here's an (admittedly nonsensical) example: -

-  int i, j;
-  int a[10], b[10];
-  for (i = 0; i < 10; i++) {
-    j = a[i];
-    b[i] = j;
-  }
-
- -

Valgrind emits no complaints about this, since it merely copies -uninitialised values from a[] into b[], and -doesn't use them in any way. However, if the loop is changed to -

-  for (i = 0; i < 10; i++) {
-    j += a[i];
-  }
-  if (j == 77) 
-     printf("hello there\n");
-
-then Valgrind will complain, at the if, that the -condition depends on uninitialised values. - -

Most low level operations, such as adds, cause Valgrind to -use the V bits for the operands to calculate the V bits for the -result. Even if the result is partially or wholly undefined, -it does not complain. - -

Checks on definedness only occur in two places: when a value is -used to generate a memory address, and where control flow decision -needs to be made. Also, when a system call is detected, valgrind -checks definedness of parameters as required. - -

If a check should detect undefinedness, an error message is -issued. The resulting value is subsequently regarded as well-defined. -To do otherwise would give long chains of error messages. In effect, -we say that undefined values are non-infectious. - -

This sounds overcomplicated. Why not just check all reads from -memory, and complain if an undefined value is loaded into a CPU register? -Well, that doesn't work well, because perfectly legitimate C programs routinely -copy uninitialised values around in memory, and we don't want endless complaints -about that. Here's the canonical example. Consider a struct -like this: -

-  struct S { int x; char c; };
-  struct S s1, s2;
-  s1.x = 42;
-  s1.c = 'z';
-  s2 = s1;
-
- -

The question to ask is: how large is struct S, in -bytes? An int is 4 bytes and a char one byte, so perhaps a struct S -occupies 5 bytes? Wrong. All (non-toy) compilers I know of will -round the size of struct S up to a whole number of words, -in this case 8 bytes. Not doing this forces compilers to generate -truly appalling code for subscripting arrays of struct -S's. - -

So s1 occupies 8 bytes, yet only 5 of them will be initialised. -For the assignment s2 = s1, gcc generates code to copy -all 8 bytes wholesale into s2 without regard for their -meaning. If Valgrind simply checked values as they came out of -memory, it would yelp every time a structure assignment like this -happened. So the more complicated semantics described above is -necessary. This allows gcc to copy s1 into -s2 any way it likes, and a warning will only be emitted -if the uninitialised values are later used. - -

One final twist to this story. The above scheme allows garbage to -pass through the CPU's integer registers without complaint. It does -this by giving the integer registers V tags, passing these around in -the expected way. This complicated and computationally expensive to -do, but is necessary. Valgrind is more simplistic about -floating-point loads and stores. In particular, V bits for data read -as a result of floating-point loads are checked at the load -instruction. So if your program uses the floating-point registers to -do memory-to-memory copies, you will get complaints about -uninitialised values. Fortunately, I have not yet encountered a -program which (ab)uses the floating-point registers in this way. - - -

3.2  Valid-address (A) bits

- -Notice that the previous section describes how the validity of values -is established and maintained without having to say whether the -program does or does not have the right to access any particular -memory location. We now consider the latter issue. - -

As described above, every bit in memory or in the CPU has an -associated valid-value (V) bit. In addition, all bytes in memory, but -not in the CPU, have an associated valid-address (A) bit. This -indicates whether or not the program can legitimately read or write -that location. It does not give any indication of the validity or the -data at that location -- that's the job of the V bits -- only whether -or not the location may be accessed. - -

Every time your program reads or writes memory, Valgrind checks the -A bits associated with the address. If any of them indicate an -invalid address, an error is emitted. Note that the reads and writes -themselves do not change the A bits, only consult them. - -

So how do the A bits get set/cleared? Like this: - -

    -
  • When the program starts, all the global data areas are marked as - accessible.

  • -

    - -

  • When the program does malloc/new, the A bits for the exactly the - area allocated, and not a byte more, are marked as accessible. - Upon freeing the area the A bits are changed to indicate - inaccessibility.

  • -

    - -

  • When the stack pointer register (%esp) moves up or down, A bits - are set. The rule is that the area from %esp up to the base of - the stack is marked as accessible, and below %esp is - inaccessible. (If that sounds illogical, bear in mind that the - stack grows down, not up, on almost all Unix systems, including - GNU/Linux.) Tracking %esp like this has the useful side-effect - that the section of stack used by a function for local variables - etc is automatically marked accessible on function entry and - inaccessible on exit.

  • -

    - -

  • When doing system calls, A bits are changed appropriately. For - example, mmap() magically makes files appear in the process's - address space, so the A bits must be updated if mmap() - succeeds.

  • -

    - -

  • Optionally, your program can tell Valgrind about such changes - explicitly, using the client request mechanism described above. -
- - - -

3.3  Putting it all together

-Valgrind's checking machinery can be summarised as follows: - -
    -
  • Each byte in memory has 8 associated V (valid-value) bits, - saying whether or not the byte has a defined value, and a single - A (valid-address) bit, saying whether or not the program - currently has the right to read/write that address.

  • -

    - -

  • When memory is read or written, the relevant A bits are - consulted. If they indicate an invalid address, Valgrind emits - an Invalid read or Invalid write error.

  • -

    - -

  • When memory is read into the CPU's integer registers, the - relevant V bits are fetched from memory and stored in the - simulated CPU. They are not consulted.

  • -

    - -

  • When an integer register is written out to memory, the V bits - for that register are written back to memory too.

  • -

    - -

  • When memory is read into the CPU's floating point registers, the - relevant V bits are read from memory and they are immediately - checked. If any are invalid, an uninitialised value error is - emitted. This precludes using the floating-point registers to - copy possibly-uninitialised memory, but simplifies Valgrind in - that it does not have to track the validity status of the - floating-point registers.

  • -

    - -

  • As a result, when a floating-point register is written to - memory, the associated V bits are set to indicate a valid - value.

  • -

    - -

  • When values in integer CPU registers are used to generate a - memory address, or to determine the outcome of a conditional - branch, the V bits for those values are checked, and an error - emitted if any of them are undefined.

  • -

    - -

  • When values in integer CPU registers are used for any other - purpose, Valgrind computes the V bits for the result, but does - not check them.

  • -

    - -

  • One the V bits for a value in the CPU have been checked, they - are then set to indicate validity. This avoids long chains of - errors.

  • -

    - -

  • When values are loaded from memory, valgrind checks the A bits - for that location and issues an illegal-address warning if - needed. In that case, the V bits loaded are forced to indicate - Valid, despite the location being invalid. -

    - This apparently strange choice reduces the amount of confusing - information presented to the user. It avoids the - unpleasant phenomenon in which memory is read from a place which - is both unaddressible and contains invalid values, and, as a - result, you get not only an invalid-address (read/write) error, - but also a potentially large set of uninitialised-value errors, - one for every time the value is used. -

    - There is a hazy boundary case to do with multi-byte loads from - addresses which are partially valid and partially invalid. See - details of the flag --partial-loads-ok for details. -


  • -
- -Valgrind intercepts calls to malloc, calloc, realloc, valloc, -memalign, free, new and delete. The behaviour you get is: - -
    - -
  • malloc/new: the returned memory is marked as addressible but not - having valid values. This means you have to write on it before - you can read it.

  • -

    - -

  • calloc: returned memory is marked both addressible and valid, - since calloc() clears the area to zero.

  • -

    - -

  • realloc: if the new size is larger than the old, the new section - is addressible but invalid, as with malloc.

  • -

    - -

  • If the new size is smaller, the dropped-off section is marked as - unaddressible. You may only pass to realloc a pointer - previously issued to you by malloc/calloc/new/realloc.

  • -

    - -

  • free/delete: you may only pass to free a pointer previously - issued to you by malloc/calloc/new/realloc, or the value - NULL. Otherwise, Valgrind complains. If the pointer is indeed - valid, Valgrind marks the entire area it points at as - unaddressible, and places the block in the freed-blocks-queue. - The aim is to defer as long as possible reallocation of this - block. Until that happens, all attempts to access it will - elicit an invalid-address error, as you would hope.

  • -
- - - - -

3.4  Signals

- -Valgrind provides suitable handling of signals, so, provided you stick -to POSIX stuff, you should be ok. Basic sigaction() and sigprocmask() -are handled. Signal handlers may return in the normal way or do -longjmp(); both should work ok. As specified by POSIX, a signal is -blocked in its own handler. Default actions for signals should work -as before. Etc, etc. - -

Under the hood, dealing with signals is a real pain, and Valgrind's -simulation leaves much to be desired. If your program does -way-strange stuff with signals, bad things may happen. If so, let me -know. I don't promise to fix it, but I'd at least like to be aware of -it. - - - -

3.5  Memory leak detection

- -Valgrind keeps track of all memory blocks issued in response to calls -to malloc/calloc/realloc/new. So when the program exits, it knows -which blocks are still outstanding -- have not been returned, in other -words. Ideally, you want your program to have no blocks still in use -at exit. But many programs do. - -

For each such block, Valgrind scans the entire address space of the -process, looking for pointers to the block. One of three situations -may result: - -

    -
  • A pointer to the start of the block is found. This usually - indicates programming sloppiness; since the block is still - pointed at, the programmer could, at least in principle, free'd - it before program exit.

  • -

    - -

  • A pointer to the interior of the block is found. The pointer - might originally have pointed to the start and have been moved - along, or it might be entirely unrelated. Valgrind deems such a - block as "dubious", that is, possibly leaked, - because it's unclear whether or - not a pointer to it still exists.

  • -

    - -

  • The worst outcome is that no pointer to the block can be found. - The block is classified as "leaked", because the - programmer could not possibly have free'd it at program exit, - since no pointer to it exists. This might be a symptom of - having lost the pointer at some earlier point in the - program.
  • -
- -Valgrind reports summaries about leaked and dubious blocks. -For each such block, it will also tell you where the block was -allocated. This should help you figure out why the pointer to it has -been lost. In general, you should attempt to ensure your programs do -not have any leaked or dubious blocks at exit. - -

The precise area of memory in which Valgrind searches for pointers -is: all naturally-aligned 4-byte words for which all A bits indicate -addressibility and all V bits indicated that the stored value is -actually valid. - -


- - - -

4  Limitations

- -The following list of limitations seems depressingly long. However, -most programs actually work fine. - -

Valgrind will run x86-GNU/Linux ELF dynamically linked binaries, on -a kernel 2.2.X or 2.4.X system, subject to the following constraints: - -

    -
  • No MMX, SSE, SSE2, 3DNow instructions. If the translator - encounters these, Valgrind will simply give up. It may be - possible to add support for them at a later time. Intel added a - few instructions such as "cmov" to the integer instruction set - on Pentium and later processors, and these are supported. - Nevertheless it's safest to think of Valgrind as implementing - the 486 instruction set.

  • -

    - -

  • Pthreads support is improving, but there are still significant - limitations in that department. See the section above on - Pthreads. Note that your program must be dynamically linked - against libpthread.so, so that Valgrind can - substitute its own implementation at program startup time. If - you're statically linked against it, things will fail - badly.

  • -

    - -

  • Valgrind assumes that the floating point registers are not used - as intermediaries in memory-to-memory copies, so it immediately - checks V bits in floating-point loads/stores. If you want to - write code which copies around possibly-uninitialised values, - you must ensure these travel through the integer registers, not - the FPU.

  • -

    - -

  • If your program does its own memory management, rather than - using malloc/new/free/delete, it should still work, but - Valgrind's error checking won't be so effective.

  • -

    - -

  • Valgrind's signal simulation is not as robust as it could be. - Basic POSIX-compliant sigaction and sigprocmask functionality is - supplied, but it's conceivable that things could go badly awry - if you do wierd things with signals. Workaround: don't. - Programs that do non-POSIX signal tricks are in any case - inherently unportable, so should be avoided if - possible.

  • -

    - -

  • Programs which switch stacks are not well handled. Valgrind - does have support for this, but I don't have great faith in it. - It's difficult -- there's no cast-iron way to decide whether a - large change in %esp is as a result of the program switching - stacks, or merely allocating a large object temporarily on the - current stack -- yet Valgrind needs to handle the two situations - differently. 1 May 02: this probably interacts badly with the - new pthread support. I haven't checked properly.

  • -

    - -

  • x86 instructions, and system calls, have been implemented on - demand. So it's possible, although unlikely, that a program - will fall over with a message to that effect. If this happens, - please mail me ALL the details printed out, so I can try and - implement the missing feature.

  • -

    - -

  • x86 floating point works correctly, but floating-point code may - run even more slowly than integer code, due to my simplistic - approach to FPU emulation.

  • -

    - -

  • You can't Valgrind-ize statically linked binaries. Valgrind - relies on the dynamic-link mechanism to gain control at - startup.

  • -

    - -

  • Memory consumption of your program is majorly increased whilst - running under Valgrind. This is due to the large amount of - adminstrative information maintained behind the scenes. Another - cause is that Valgrind dynamically translates the original - executable. Translated, instrumented code is 14-16 times larger - than the original (!) so you can easily end up with 30+ MB of - translations when running (eg) a web browser. -
  • -
- -Programs which are known not to work are: - -
    -
  • emacs starts up but immediately concludes it is out of memory - and aborts. Emacs has it's own memory-management scheme, but I - don't understand why this should interact so badly with - Valgrind. Emacs works fine if you build it to use the standard - malloc/free routines.

  • -

    -

- -Known platform-specific limitations, as of release 1.0.0: - -
    -
  • On Red Hat 7.3, there have been reports of link errors (at - program start time) for threaded programs using - __pthread_clock_gettime and - __pthread_clock_settime. This appears to be due to - /lib/librt-2.2.5.so needing them. Unfortunately I - do not understand enough about this problem to fix it properly, - and I can't reproduce it on my test RedHat 7.3 system. Please - mail me if you have more information / understanding.

  • -

    -

  • - 1.0.0 now partially works on Red Hat 7.3.92 ("Limbo" - public beta). However, don't expect a smooth ride. - Basically valgrind won't work as-is with any - glibc-2.3 based system. Limbo is just a little pre glibc-2.3 - and it just about works. Limbo is also gcc-3.1 based and so - suffers from the problems in the following point.

  • -

    -

  • - Inlining of string functions with gcc-3.1 or above causes a - large number of false reports of uninitialised value uses. I - know what the problem is and roughly how to fix it, but I need - to devise a reasonably efficient fix. Try to reduce the - optimisation level, or use -fno-builtin-strlen in - the meantime. Or use an earlier gcc.

  • -

    -

- - -


- - - -

5  How it works -- a rough overview

-Some gory details, for those with a passion for gory details. You -don't need to read this section if all you want to do is use Valgrind. - - -

5.1  Getting started

- -Valgrind is compiled into a shared object, valgrind.so. The shell -script valgrind sets the LD_PRELOAD environment variable to point to -valgrind.so. This causes the .so to be loaded as an extra library to -any subsequently executed dynamically-linked ELF binary, viz, the -program you want to debug. - -

The dynamic linker allows each .so in the process image to have an -initialisation function which is run before main(). It also allows -each .so to have a finalisation function run after main() exits. - -

When valgrind.so's initialisation function is called by the dynamic -linker, the synthetic CPU to starts up. The real CPU remains locked -in valgrind.so for the entire rest of the program, but the synthetic -CPU returns from the initialisation function. Startup of the program -now continues as usual -- the dynamic linker calls all the other .so's -initialisation routines, and eventually runs main(). This all runs on -the synthetic CPU, not the real one, but the client program cannot -tell the difference. - -

Eventually main() exits, so the synthetic CPU calls valgrind.so's -finalisation function. Valgrind detects this, and uses it as its cue -to exit. It prints summaries of all errors detected, possibly checks -for memory leaks, and then exits the finalisation routine, but now on -the real CPU. The synthetic CPU has now lost control -- permanently --- so the program exits back to the OS on the real CPU, just as it -would have done anyway. - -

On entry, Valgrind switches stacks, so it runs on its own stack. -On exit, it switches back. This means that the client program -continues to run on its own stack, so we can switch back and forth -between running it on the simulated and real CPUs without difficulty. -This was an important design decision, because it makes it easy (well, -significantly less difficult) to debug the synthetic CPU. - - - -

5.2  The translation/instrumentation engine

- -Valgrind does not directly run any of the original program's code. Only -instrumented translations are run. Valgrind maintains a translation -table, which allows it to find the translation quickly for any branch -target (code address). If no translation has yet been made, the -translator - a just-in-time translator - is summoned. This makes an -instrumented translation, which is added to the collection of -translations. Subsequent jumps to that address will use this -translation. - -

Valgrind no longer directly supports detection of self-modifying -code. Such checking is expensive, and in practice (fortunately) -almost no applications need it. However, to help people who are -debugging dynamic code generation systems, there is a Client Request -(basically a macro you can put in your program) which directs Valgrind -to discard translations in a given address range. So Valgrind can -still work in this situation provided the client tells it when -code has become out-of-date and needs to be retranslated. - -

The JITter translates basic blocks -- blocks of straight-line-code --- as single entities. To minimise the considerable difficulties of -dealing with the x86 instruction set, x86 instructions are first -translated to a RISC-like intermediate code, similar to sparc code, -but with an infinite number of virtual integer registers. Initially -each insn is translated seperately, and there is no attempt at -instrumentation. - -

The intermediate code is improved, mostly so as to try and cache -the simulated machine's registers in the real machine's registers over -several simulated instructions. This is often very effective. Also, -we try to remove redundant updates of the simulated machines's -condition-code register. - -

The intermediate code is then instrumented, giving more -intermediate code. There are a few extra intermediate-code operations -to support instrumentation; it is all refreshingly simple. After -instrumentation there is a cleanup pass to remove redundant value -checks. - -

This gives instrumented intermediate code which mentions arbitrary -numbers of virtual registers. A linear-scan register allocator is -used to assign real registers and possibly generate spill code. All -of this is still phrased in terms of the intermediate code. This -machinery is inspired by the work of Reuben Thomas (MITE). - -

Then, and only then, is the final x86 code emitted. The -intermediate code is carefully designed so that x86 code can be -generated from it without need for spare registers or other -inconveniences. - -

The translations are managed using a traditional LRU-based caching -scheme. The translation cache has a default size of about 14MB. - - - -

5.3  Tracking the status of memory

Each byte in the -process' address space has nine bits associated with it: one A bit and -eight V bits. The A and V bits for each byte are stored using a -sparse array, which flexibly and efficiently covers arbitrary parts of -the 32-bit address space without imposing significant space or -performance overheads for the parts of the address space never -visited. The scheme used, and speedup hacks, are described in detail -at the top of the source file vg_memory.c, so you should read that for -the gory details. - - - -

5.4 System calls

-All system calls are intercepted. The memory status map is consulted -before and updated after each call. It's all rather tiresome. See -vg_syscall_mem.c for details. - - - -

5.5  Signals

-All system calls to sigaction() and sigprocmask() are intercepted. If -the client program is trying to set a signal handler, Valgrind makes a -note of the handler address and which signal it is for. Valgrind then -arranges for the same signal to be delivered to its own handler. - -

When such a signal arrives, Valgrind's own handler catches it, and -notes the fact. At a convenient safe point in execution, Valgrind -builds a signal delivery frame on the client's stack and runs its -handler. If the handler longjmp()s, there is nothing more to be said. -If the handler returns, Valgrind notices this, zaps the delivery -frame, and carries on where it left off before delivering the signal. - -

The purpose of this nonsense is that setting signal handlers -essentially amounts to giving callback addresses to the Linux kernel. -We can't allow this to happen, because if it did, signal handlers -would run on the real CPU, not the simulated one. This means the -checking machinery would not operate during the handler run, and, -worse, memory permissions maps would not be updated, which could cause -spurious error reports once the handler had returned. - -

An even worse thing would happen if the signal handler longjmp'd -rather than returned: Valgrind would completely lose control of the -client program. - -

Upshot: we can't allow the client to install signal handlers -directly. Instead, Valgrind must catch, on behalf of the client, any -signal the client asks to catch, and must delivery it to the client on -the simulated CPU, not the real one. This involves considerable -gruesome fakery; see vg_signals.c for details. -

- -


- - -

6  Example

-This is the log for a run of a small program. The program is in fact -correct, and the reported error is as the result of a potentially serious -code generation bug in GNU g++ (snapshot 20010527). -
-sewardj@phoenix:~/newmat10$
-~/Valgrind-6/valgrind -v ./bogon 
-==25832== Valgrind 0.10, a memory error detector for x86 RedHat 7.1.
-==25832== Copyright (C) 2000-2001, and GNU GPL'd, by Julian Seward.
-==25832== Startup, with flags:
-==25832== --suppressions=/home/sewardj/Valgrind/redhat71.supp
-==25832== reading syms from /lib/ld-linux.so.2
-==25832== reading syms from /lib/libc.so.6
-==25832== reading syms from /mnt/pima/jrs/Inst/lib/libgcc_s.so.0
-==25832== reading syms from /lib/libm.so.6
-==25832== reading syms from /mnt/pima/jrs/Inst/lib/libstdc++.so.3
-==25832== reading syms from /home/sewardj/Valgrind/valgrind.so
-==25832== reading syms from /proc/self/exe
-==25832== loaded 5950 symbols, 142333 line number locations
-==25832== 
-==25832== Invalid read of size 4
-==25832==    at 0x8048724: _ZN10BandMatrix6ReSizeEiii (bogon.cpp:45)
-==25832==    by 0x80487AF: main (bogon.cpp:66)
-==25832==    by 0x40371E5E: __libc_start_main (libc-start.c:129)
-==25832==    by 0x80485D1: (within /home/sewardj/newmat10/bogon)
-==25832==    Address 0xBFFFF74C is not stack'd, malloc'd or free'd
-==25832==
-==25832== ERROR SUMMARY: 1 errors from 1 contexts (suppressed: 0 from 0)
-==25832== malloc/free: in use at exit: 0 bytes in 0 blocks.
-==25832== malloc/free: 0 allocs, 0 frees, 0 bytes allocated.
-==25832== For a detailed leak analysis, rerun with: --leak-check=yes
-==25832==
-==25832== exiting, did 1881 basic blocks, 0 misses.
-==25832== 223 translations, 3626 bytes in, 56801 bytes out.
-
-

The GCC folks fixed this about a week before gcc-3.0 shipped. -


-

- - - - -

7  Cache profiling

-As well as memory debugging, Valgrind also allows you to do cache simulations -and annotate your source line-by-line with the number of cache misses. In -particular, it records: -
    -
  • L1 instruction cache reads and misses; -
  • L1 data cache reads and read misses, writes and write misses; -
  • L2 unified cache reads and read misses, writes and writes misses. -
-On a modern x86 machine, an L1 miss will typically cost around 10 cycles, -and an L2 miss can cost as much as 200 cycles. Detailed cache profiling can be -very useful for improving the performance of your program.

- -Also, since one instruction cache read is performed per instruction executed, -you can find out how many instructions are executed per line, which can be -useful for traditional profiling and test coverage.

- -Any feedback, bug-fixes, suggestions, etc, welcome. - - -

7.1  Overview

-First off, as for normal Valgrind use, you probably want to turn on debugging -info (the -g flag). But by contrast with normal Valgrind use, you -probably do want to turn optimisation on, since you should profile your -program as it will be normally run. - -The two steps are: -
    -
  1. Run your program with cachegrind in front of the - normal command line invocation. When the program finishes, - Valgrind will print summary cache statistics. It also collects - line-by-line information in a file cachegrind.out. -

    - This step should be done every time you want to collect - information about a new program, a changed program, or about the - same program with different input. -

  2. -

    -

  3. Generate a function-by-function summary, and possibly annotate - source files with 'vg_annotate'. Source files to annotate can be - specified manually, or manually on the command line, or - "interesting" source files can be annotated automatically with - the --auto=yes option. You can annotate C/C++ - files or assembly language files equally easily. -

    - This step can be performed as many times as you like for each - Step 2. You may want to do multiple annotations showing - different information each time.

    -

  4. -
- -The steps are described in detail in the following sections.

- - -

7.2  Cache simulation specifics

- -Cachegrind uses a simulation for a machine with a split L1 cache and a unified -L2 cache. This configuration is used for all (modern) x86-based machines we -are aware of. Old Cyrix CPUs had a unified I and D L1 cache, but they are -ancient history now.

- -The more specific characteristics of the simulation are as follows. - -

    -
  • Write-allocate: when a write miss occurs, the block written to - is brought into the D1 cache. Most modern caches have this - property.
  • - -

  • Bit-selection hash function: the line(s) in the cache to which a - memory block maps is chosen by the middle bits M--(M+N-1) of the - byte address, where: -
      -
    •  line size = 2^M bytes 
    • -
    • (cache size / line size) = 2^N bytes
    • -
  • - -

  • Inclusive L2 cache: the L2 cache replicates all the entries of - the L1 cache. This is standard on Pentium chips, but AMD - Athlons use an exclusive L2 cache that only holds blocks evicted - from L1. Ditto AMD Durons and most modern VIAs.
  • -

- -The cache configuration simulated (cache size, associativity and line size) is -determined automagically using the CPUID instruction. If you have an old -machine that (a) doesn't support the CPUID instruction, or (b) supports it in -an early incarnation that doesn't give any cache information, then Cachegrind -will fall back to using a default configuration (that of a model 3/4 Athlon). -Cachegrind will tell you if this happens. You can manually specify one, two or -all three levels (I1/D1/L2) of the cache from the command line using the ---I1, --D1 and --L2 options.

- -Other noteworthy behaviour: - -

    -
  • References that straddle two cache lines are treated as follows: -
      -
    • If both blocks hit --> counted as one hit
    • -
    • If one block hits, the other misses --> counted as one miss
    • -
    • If both blocks miss --> counted as one miss (not two)
    • -

  • - -
  • Instructions that modify a memory location (eg. inc and - dec) are counted as doing just a read, ie. a single data - reference. This may seem strange, but since the write can never cause a - miss (the read guarantees the block is in the cache) it's not very - interesting.

    - - Thus it measures not the number of times the data cache is accessed, but - the number of times a data cache miss could occur.

    -

  • -
- -If you are interested in simulating a cache with different properties, it is -not particularly hard to write your own cache simulator, or to modify the -existing ones in vg_cachesim_I1.c, vg_cachesim_D1.c, -vg_cachesim_L2.c and vg_cachesim_gen.c. We'd be -interested to hear from anyone who does. - - -

7.3  Profiling programs

- -Cache profiling is enabled by using the --cachesim=yes -option to the valgrind shell script. Alternatively, it -is probably more convenient to use the cachegrind script. -Either way automatically turns off Valgrind's memory checking functions, -since the cache simulation is slow enough already, and you probably -don't want to do both at once. -

-To gather cache profiling information about the program ls --l, type: - -

cachegrind ls -l
- -The program will execute (slowly). Upon completion, summary statistics -that look like this will be printed: - -
-==31751== I   refs:      27,742,716
-==31751== I1  misses:           276
-==31751== L2  misses:           275
-==31751== I1  miss rate:        0.0%
-==31751== L2i miss rate:        0.0%
-==31751== 
-==31751== D   refs:      15,430,290  (10,955,517 rd + 4,474,773 wr)
-==31751== D1  misses:        41,185  (    21,905 rd +    19,280 wr)
-==31751== L2  misses:        23,085  (     3,987 rd +    19,098 wr)
-==31751== D1  miss rate:        0.2% (       0.1%   +       0.4%)
-==31751== L2d miss rate:        0.1% (       0.0%   +       0.4%)
-==31751== 
-==31751== L2 misses:         23,360  (     4,262 rd +    19,098 wr)
-==31751== L2 miss rate:         0.0% (       0.0%   +       0.4%)
-
- -Cache accesses for instruction fetches are summarised first, giving the -number of fetches made (this is the number of instructions executed, which -can be useful to know in its own right), the number of I1 misses, and the -number of L2 instruction (L2i) misses.

- -Cache accesses for data follow. The information is similar to that of the -instruction fetches, except that the values are also shown split between reads -and writes (note each row's rd and wr values add up -to the row's total).

- -Combined instruction and data figures for the L2 cache follow that.

- - -

7.4  Output file

- -As well as printing summary information, Cachegrind also writes -line-by-line cache profiling information to a file named -cachegrind.out. This file is human-readable, but is best -interpreted by the accompanying program vg_annotate, -described in the next section. -

-Things to note about the cachegrind.out file: -

    -
  • It is written every time valgrind --cachesim=yes or - cachegrind is run, and will overwrite any existing - cachegrind.out in the current directory.
  • -

    -

  • It can be huge: ls -l generates a file of about - 350KB. Browsing a few files and web pages with a Konqueror - built with full debugging information generates a file - of around 15 MB.
  • -
- - -

7.5  Cachegrind options

-Cachegrind accepts all the options that Valgrind does, although some of them -(ones related to memory checking) don't do anything when cache profiling.

- -The interesting cache-simulation specific options are: - -

    -
  • --I1=<size>,<associativity>,<line_size>
    - --D1=<size>,<associativity>,<line_size>
    - --L2=<size>,<associativity>,<line_size>

    - [default: uses CPUID for automagic cache configuration]

    - - Manually specifies the I1/D1/L2 cache configuration, where - size and line_size are measured in bytes. The - three items must be comma-separated, but with no spaces, eg: - -

    cachegrind --I1=65535,2,64
    - - You can specify one, two or three of the I1/D1/L2 caches. Any level not - manually specified will be simulated using the configuration found in the - normal way (via the CPUID instruction, or failing that, via defaults). -
- - - -

7.6  Annotating C/C++ programs

- -Before using vg_annotate, it is worth widening your -window to be at least 120-characters wide if possible, as the output -lines can be quite long. -

-To get a function-by-function summary, run vg_annotate in -directory containing a cachegrind.out file. The output -looks like this: - -

---------------------------------------------------------------------------------
-I1 cache:              65536 B, 64 B, 2-way associative
-D1 cache:              65536 B, 64 B, 2-way associative
-L2 cache:              262144 B, 64 B, 8-way associative
-Command:               concord vg_to_ucode.c
-Events recorded:       Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw
-Events shown:          Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw
-Event sort order:      Ir I1mr I2mr Dr D1mr D2mr Dw D1mw D2mw
-Threshold:             99%
-Chosen for annotation:
-Auto-annotation:       on
-
---------------------------------------------------------------------------------
-Ir         I1mr I2mr Dr         D1mr   D2mr  Dw        D1mw   D2mw
---------------------------------------------------------------------------------
-27,742,716  276  275 10,955,517 21,905 3,987 4,474,773 19,280 19,098  PROGRAM TOTALS
-
---------------------------------------------------------------------------------
-Ir        I1mr I2mr Dr        D1mr  D2mr  Dw        D1mw   D2mw    file:function
---------------------------------------------------------------------------------
-8,821,482    5    5 2,242,702 1,621    73 1,794,230      0      0  getc.c:_IO_getc
-5,222,023    4    4 2,276,334    16    12   875,959      1      1  concord.c:get_word
-2,649,248    2    2 1,344,810 7,326 1,385         .      .      .  vg_main.c:strcmp
-2,521,927    2    2   591,215     0     0   179,398      0      0  concord.c:hash
-2,242,740    2    2 1,046,612   568    22   448,548      0      0  ctype.c:tolower
-1,496,937    4    4   630,874 9,000 1,400   279,388      0      0  concord.c:insert
-  897,991   51   51   897,831    95    30        62      1      1  ???:???
-  598,068    1    1   299,034     0     0   149,517      0      0  ../sysdeps/generic/lockfile.c:__flockfile
-  598,068    0    0   299,034     0     0   149,517      0      0  ../sysdeps/generic/lockfile.c:__funlockfile
-  598,024    4    4   213,580    35    16   149,506      0      0  vg_clientmalloc.c:malloc
-  446,587    1    1   215,973 2,167   430   129,948 14,057 13,957  concord.c:add_existing
-  341,760    2    2   128,160     0     0   128,160      0      0  vg_clientmalloc.c:vg_trap_here_WRAPPER
-  320,782    4    4   150,711   276     0    56,027     53     53  concord.c:init_hash_table
-  298,998    1    1   106,785     0     0    64,071      1      1  concord.c:create
-  149,518    0    0   149,516     0     0         1      0      0  ???:tolower@@GLIBC_2.0
-  149,518    0    0   149,516     0     0         1      0      0  ???:fgetc@@GLIBC_2.0
-   95,983    4    4    38,031     0     0    34,409  3,152  3,150  concord.c:new_word_node
-   85,440    0    0    42,720     0     0    21,360      0      0  vg_clientmalloc.c:vg_bogus_epilogue
-
- -First up is a summary of the annotation options: - -
    -
  • I1 cache, D1 cache, L2 cache: cache configuration. So you know the - configuration with which these results were obtained.
  • - -

  • Command: the command line invocation of the program under - examination.
  • - -

  • Events recorded: event abbreviations are:

    -

      -
    • Ir : I cache reads (ie. instructions executed)
    • -
    • I1mr: I1 cache read misses
    • -
    • I2mr: L2 cache instruction read misses
    • -
    • Dr : D cache reads (ie. memory reads)
    • -
    • D1mr: D1 cache read misses
    • -
    • D2mr: L2 cache data read misses
    • -
    • Dw : D cache writes (ie. memory writes)
    • -
    • D1mw: D1 cache write misses
    • -
    • D2mw: L2 cache data write misses
    • -

    - Note that D1 total accesses is given by D1mr + - D1mw, and that L2 total accesses is given by - I2mr + D2mr + D2mw.

  • - -

  • Events shown: the events shown (a subset of events gathered). This can - be adjusted with the --show option.
  • - -

  • Event sort order: the sort order in which functions are shown. For - example, in this case the functions are sorted from highest - Ir counts to lowest. If two functions have identical - Ir counts, they will then be sorted by I1mr - counts, and so on. This order can be adjusted with the - --sort option.

    - - Note that this dictates the order the functions appear. It is not - the order in which the columns appear; that is dictated by the "events - shown" line (and can be changed with the --show option). -

  • - -

  • Threshold: vg_annotate by default omits functions - that cause very low numbers of misses to avoid drowning you in - information. In this case, vg_annotate shows summaries the - functions that account for 99% of the Ir counts; - Ir is chosen as the threshold event since it is the - primary sort event. The threshold can be adjusted with the - --threshold option.
  • - -

  • Chosen for annotation: names of files specified manually for annotation; - in this case none.
  • - -

  • Auto-annotation: whether auto-annotation was requested via the - --auto=yes option. In this case no.
  • -

- -Then follows summary statistics for the whole program. These are similar -to the summary provided when running cachegrind.

- -Then follows function-by-function statistics. Each function is -identified by a file_name:function_name pair. If a column -contains only a dot it means the function never performs -that event (eg. the third row shows that strcmp() -contains no instructions that write to memory). The name -??? is used if the the file name and/or function name -could not be determined from debugging information. If most of the -entries have the form ???:??? the program probably wasn't -compiled with -g. If any code was invalidated (either due to -self-modifying code or unloading of shared objects) its counts are aggregated -into a single cost centre written as (discarded):(discarded).

- -It is worth noting that functions will come from three types of source files: -

    -
  1. From the profiled program (concord.c in this example).
  2. -
  3. From libraries (eg. getc.c)
  4. -
  5. From Valgrind's implementation of some libc functions (eg. - vg_clientmalloc.c:malloc). These are recognisable because - the filename begins with vg_, and is probably one of - vg_main.c, vg_clientmalloc.c or - vg_mylibc.c. -
  6. -
- -There are two ways to annotate source files -- by choosing them -manually, or with the --auto=yes option. To do it -manually, just specify the filenames as arguments to -vg_annotate. For example, the output from running -vg_annotate concord.c for our example produces the same -output as above followed by an annotated version of -concord.c, a section of which looks like: - -
---------------------------------------------------------------------------------
--- User-annotated source: concord.c
---------------------------------------------------------------------------------
-Ir        I1mr I2mr Dr      D1mr  D2mr  Dw      D1mw   D2mw
-
-[snip]
-
-        .    .    .       .     .     .       .      .      .  void init_hash_table(char *file_name, Word_Node *table[])
-        3    1    1       .     .     .       1      0      0  {
-        .    .    .       .     .     .       .      .      .      FILE *file_ptr;
-        .    .    .       .     .     .       .      .      .      Word_Info *data;
-        1    0    0       .     .     .       1      1      1      int line = 1, i;
-        .    .    .       .     .     .       .      .      .
-        5    0    0       .     .     .       3      0      0      data = (Word_Info *) create(sizeof(Word_Info));
-        .    .    .       .     .     .       .      .      .
-    4,991    0    0   1,995     0     0     998      0      0      for (i = 0; i < TABLE_SIZE; i++)
-    3,988    1    1   1,994     0     0     997     53     52          table[i] = NULL;
-        .    .    .       .     .     .       .      .      .
-        .    .    .       .     .     .       .      .      .      /* Open file, check it. */
-        6    0    0       1     0     0       4      0      0      file_ptr = fopen(file_name, "r");
-        2    0    0       1     0     0       .      .      .      if (!(file_ptr)) {
-        .    .    .       .     .     .       .      .      .          fprintf(stderr, "Couldn't open '%s'.\n", file_name);
-        1    1    1       .     .     .       .      .      .          exit(EXIT_FAILURE);
-        .    .    .       .     .     .       .      .      .      }
-        .    .    .       .     .     .       .      .      .
-  165,062    1    1  73,360     0     0  91,700      0      0      while ((line = get_word(data, line, file_ptr)) != EOF)
-  146,712    0    0  73,356     0     0  73,356      0      0          insert(data->;word, data->line, table);
-        .    .    .       .     .     .       .      .      .
-        4    0    0       1     0     0       2      0      0      free(data);
-        4    0    0       1     0     0       2      0      0      fclose(file_ptr);
-        3    0    0       2     0     0       .      .      .  }
-
- -(Although column widths are automatically minimised, a wide terminal is clearly -useful.)

- -Each source file is clearly marked (User-annotated source) as -having been chosen manually for annotation. If the file was found in one of -the directories specified with the -I/--include -option, the directory and file are both given.

- -Each line is annotated with its event counts. Events not applicable for a line -are represented by a `.'; this is useful for distinguishing between an event -which cannot happen, and one which can but did not.

- -Sometimes only a small section of a source file is executed. To minimise -uninteresting output, Valgrind only shows annotated lines and lines within a -small distance of annotated lines. Gaps are marked with the line numbers so -you know which part of a file the shown code comes from, eg: - -

-(figures and code for line 704)
--- line 704 ----------------------------------------
--- line 878 ----------------------------------------
-(figures and code for line 878)
-
- -The amount of context to show around annotated lines is controlled by the ---context option.

- -To get automatic annotation, run vg_annotate --auto=yes. -vg_annotate will automatically annotate every source file it can find that is -mentioned in the function-by-function summary. Therefore, the files chosen for -auto-annotation are affected by the --sort and ---threshold options. Each source file is clearly marked -(Auto-annotated source) as being chosen automatically. Any files -that could not be found are mentioned at the end of the output, eg: - -

---------------------------------------------------------------------------------
-The following files chosen for auto-annotation could not be found:
---------------------------------------------------------------------------------
-  getc.c
-  ctype.c
-  ../sysdeps/generic/lockfile.c
-
- -This is quite common for library files, since libraries are usually compiled -with debugging information, but the source files are often not present on a -system. If a file is chosen for annotation both manually and -automatically, it is marked as User-annotated source. - -Use the -I/--include option to tell Valgrind where to look for -source files if the filenames found from the debugging information aren't -specific enough. - -Beware that vg_annotate can take some time to digest large -cachegrind.out files, eg. 30 seconds or more. Also beware that -auto-annotation can produce a lot of output if your program is large! - - -

7.7  Annotating assembler programs

- -Valgrind can annotate assembler programs too, or annotate the -assembler generated for your C program. Sometimes this is useful for -understanding what is really happening when an interesting line of C -code is translated into multiple instructions.

- -To do this, you just need to assemble your .s files with -assembler-level debug information. gcc doesn't do this, but you can -use the GNU assembler with the --gstabs option to -generate object files with this information, eg: - -

as --gstabs foo.s
- -You can then profile and annotate source files in the same way as for C/C++ -programs. - - -

7.8  vg_annotate options

-
    -
  • -h, --help
  • -

  • -v, --version

    - - Help and version, as usual.

  • - -
  • --sort=A,B,C [default: order in - cachegrind.out]

    - Specifies the events upon which the sorting of the function-by-function - entries will be based. Useful if you want to concentrate on eg. I cache - misses (--sort=I1mr,I2mr), or D cache misses - (--sort=D1mr,D2mr), or L2 misses - (--sort=D2mr,I2mr).

  • - -

  • --show=A,B,C [default: all, using order in - cachegrind.out]

    - Specifies which events to show (and the column order). Default is to use - all present in the cachegrind.out file (and use the order in - the file).

  • - -

  • --threshold=X [default: 99%]

    - Sets the threshold for the function-by-function summary. Functions are - shown that account for more than X% of the primary sort event. If - auto-annotating, also affects which files are annotated. - - Note: thresholds can be set for more than one of the events by appending - any events for the --sort option with a colon and a number - (no spaces, though). E.g. if you want to see the functions that cover - 99% of L2 read misses and 99% of L2 write misses, use this option: - -

    --sort=D2mr:99,D2mw:99
    -
  • - -

  • --auto=no [default]
    - --auto=yes

    - When enabled, automatically annotates every file that is mentioned in the - function-by-function summary that can be found. Also gives a list of - those that couldn't be found. - -

  • --context=N [default: 8]

    - Print N lines of context before and after each annotated line. Avoids - printing large sections of source files that were not executed. Use a - large number (eg. 10,000) to show all source lines. -

  • - -

  • -I=<dir>, --include=<dir> - [default: empty string]

    - Adds a directory to the list in which to search for files. Multiple - -I/--include options can be given to add multiple directories. -

- - -

7.9  Warnings

-There are a couple of situations in which vg_annotate issues warnings. - -
    -
  • If a source file is more recent than the cachegrind.out - file. This is because the information in cachegrind.out is - only recorded with line numbers, so if the line numbers change at all in - the source (eg. lines added, deleted, swapped), any annotations will be - incorrect.

    - -

  • If information is recorded about line numbers past the end of a file. - This can be caused by the above problem, ie. shortening the source file - while using an old cachegrind.out file. If this happens, - the figures for the bogus lines are printed anyway (clearly marked as - bogus) in case they are important.
  • -

- - -

7.10  Things to watch out for

-Some odd things that can occur during annotation: - -
    -
  • If annotating at the assembler level, you might see something like this: - -
    -      1    0    0  .    .    .  .    .    .          leal -12(%ebp),%eax
    -      1    0    0  .    .    .  1    0    0          movl %eax,84(%ebx)
    -      2    0    0  0    0    0  1    0    0          movl $1,-20(%ebp)
    -      .    .    .  .    .    .  .    .    .          .align 4,0x90
    -      1    0    0  .    .    .  .    .    .          movl $.LnrB,%eax
    -      1    0    0  .    .    .  1    0    0          movl %eax,-16(%ebp)
    -      
    - - How can the third instruction be executed twice when the others are - executed only once? As it turns out, it isn't. Here's a dump of the - executable, using objdump -d: - -
    -      8048f25:       8d 45 f4                lea    0xfffffff4(%ebp),%eax
    -      8048f28:       89 43 54                mov    %eax,0x54(%ebx)
    -      8048f2b:       c7 45 ec 01 00 00 00    movl   $0x1,0xffffffec(%ebp)
    -      8048f32:       89 f6                   mov    %esi,%esi
    -      8048f34:       b8 08 8b 07 08          mov    $0x8078b08,%eax
    -      8048f39:       89 45 f0                mov    %eax,0xfffffff0(%ebp)
    -      
    - - Notice the extra mov %esi,%esi instruction. Where did this - come from? The GNU assembler inserted it to serve as the two bytes of - padding needed to align the movl $.LnrB,%eax instruction on - a four-byte boundary, but pretended it didn't exist when adding debug - information. Thus when Valgrind reads the debug info it thinks that the - movl $0x1,0xffffffec(%ebp) instruction covers the address - range 0x8048f2b--0x804833 by itself, and attributes the counts for the - mov %esi,%esi to it.

    -

  • - -
  • Inlined functions can cause strange results in the function-by-function - summary. If a function inline_me() is defined in - foo.h and inlined in the functions f1(), - f2() and f3() in bar.c, there will - not be a foo.h:inline_me() function entry. Instead, there - will be separate function entries for each inlining site, ie. - foo.h:f1(), foo.h:f2() and - foo.h:f3(). To find the total counts for - foo.h:inline_me(), add up the counts from each entry.

    - - The reason for this is that although the debug info output by gcc - indicates the switch from bar.c to foo.h, it - doesn't indicate the name of the function in foo.h, so - Valgrind keeps using the old one.

    - -

  • Sometimes, the same filename might be represented with a relative name - and with an absolute name in different parts of the debug info, eg: - /home/user/proj/proj.h and ../proj.h. In this - case, if you use auto-annotation, the file will be annotated twice with - the counts split between the two.

    -

  • - -
  • Files with more than 65,535 lines cause difficulties for the stabs debug - info reader. This is because the line number in the struct - nlist defined in a.out.h under Linux is only a 16-bit - value. Valgrind can handle some files with more than 65,535 lines - correctly by making some guesses to identify line number overflows. But - some cases are beyond it, in which case you'll get a warning message - explaining that annotations for the file might be incorrect.

    -

  • - -
  • If you compile some files with -g and some without, some - events that take place in a file without debug info could be attributed - to the last line of a file with debug info (whichever one gets placed - before the non-debug-info file in the executable).

    -

  • -
- -This list looks long, but these cases should be fairly rare.

- -Note: stabs is not an easy format to read. If you come across bizarre -annotations that look like might be caused by a bug in the stabs reader, -please let us know.

- - -

7.11  Accuracy

-Valgrind's cache profiling has a number of shortcomings: - -
    -
  • It doesn't account for kernel activity -- the effect of system calls on - the cache contents is ignored.
  • - -

  • It doesn't account for other process activity (although this is probably - desirable when considering a single program).
  • - -

  • It doesn't account for virtual-to-physical address mappings; hence the - entire simulation is not a true representation of what's happening in the - cache.
  • - -

  • It doesn't account for cache misses not visible at the instruction level, - eg. those arising from TLB misses, or speculative execution.
  • - -

  • Valgrind's custom malloc() will allocate memory in different - ways to the standard malloc(), which could warp the results. -
  • - -

  • Valgrind's custom threads implementation will schedule threads - differently to the standard one. This too could warp the results for - threaded programs. -
  • - -

  • The instructions bts, btr and btc - will incorrectly be counted as doing a data read if both the arguments - are registers, eg: - -
    btsl %eax, %edx
    - - This should only happen rarely. -
- -Another thing worth nothing is that results are very sensitive. Changing the -size of the valgrind.so file, the size of the program being -profiled, or even the length of its name can perturb the results. Variations -will be small, but don't expect perfectly repeatable results if your program -changes at all.

- -While these factors mean you shouldn't trust the results to be super-accurate, -hopefully they should be close enough to be useful.

- - -

7.12  Todo

-
    -
  • Program start-up/shut-down calls a lot of functions that aren't - interesting and just complicate the output. Would be nice to exclude - these somehow.
  • -

    -

-
- - - diff --git a/memcheck/docs/nav.html b/memcheck/docs/nav.html deleted file mode 100644 index ad920ad443..0000000000 --- a/memcheck/docs/nav.html +++ /dev/null @@ -1,72 +0,0 @@ - - - Valgrind - - - - - -
- Contents of this manual
- 1 Introduction
- 1.1 What Valgrind is for
- 1.2 What it does with - your program -

- 2 How to use it, and how to - make sense of the results
- 2.1 Getting started
- 2.2 The commentary
- 2.3 Reporting of errors
- 2.4 Suppressing errors
- 2.5 Command-line flags
- 2.6 Explanation of error messages
- 2.7 Writing suppressions files
- 2.8 The Client Request mechanism
- 2.9 Support for POSIX pthreads
- 2.10 Building and installing
- 2.11 If you have problems -

- 3 Details of the checking machinery
- 3.1 Valid-value (V) bits
- 3.2 Valid-address (A) bits
- 3.3 Putting it all together
- 3.4 Signals
- 3.5 Memory leak detection -

- 4 Limitations
-

- 5 How it works -- a rough overview
- 5.1 Getting started
- 5.2 The translation/instrumentation engine
- 5.3 Tracking the status of memory
- 5.4 System calls
- 5.5 Signals -

- 6 An example
-

- 7 Cache profiling -

- 8 The design and implementation of Valgrind
- - - diff --git a/memcheck/docs/techdocs.html b/memcheck/docs/techdocs.html deleted file mode 100644 index 2e1cc8b7e9..0000000000 --- a/memcheck/docs/techdocs.html +++ /dev/null @@ -1,2524 +0,0 @@ - - - - The design and implementation of Valgrind - - - - -  -

The design and implementation of Valgrind

- -
-Detailed technical notes for hackers, maintainers and the -overly-curious
-These notes pertain to snapshot 20020306
-

-jseward@acm.org
-
http://developer.kde.org/~sewardj
-Copyright © 2000-2002 Julian Seward -

-Valgrind is licensed under the GNU General Public License, -version 2
-An open-source tool for finding memory-management problems in -x86 GNU/Linux executables. -

- -

- - - - -


- -

Introduction

- -This document contains a detailed, highly-technical description of the -internals of Valgrind. This is not the user manual; if you are an -end-user of Valgrind, you do not want to read this. Conversely, if -you really are a hacker-type and want to know how it works, I assume -that you have read the user manual thoroughly. -

-You may need to read this document several times, and carefully. Some -important things, I only say once. - - -

History

- -Valgrind came into public view in late Feb 2002. However, it has been -under contemplation for a very long time, perhaps seriously for about -five years. Somewhat over two years ago, I started working on the x86 -code generator for the Glasgow Haskell Compiler -(http://www.haskell.org/ghc), gaining familiarity with x86 internals -on the way. I then did Cacheprof (http://www.cacheprof.org), gaining -further x86 experience. Some time around Feb 2000 I started -experimenting with a user-space x86 interpreter for x86-Linux. This -worked, but it was clear that a JIT-based scheme would be necessary to -give reasonable performance for Valgrind. Design work for the JITter -started in earnest in Oct 2000, and by early 2001 I had an x86-to-x86 -dynamic translator which could run quite large programs. This -translator was in a sense pointless, since it did not do any -instrumentation or checking. - -

-Most of the rest of 2001 was taken up designing and implementing the -instrumentation scheme. The main difficulty, which consumed a lot -of effort, was to design a scheme which did not generate large numbers -of false uninitialised-value warnings. By late 2001 a satisfactory -scheme had been arrived at, and I started to test it on ever-larger -programs, with an eventual eye to making it work well enough so that -it was helpful to folks debugging the upcoming version 3 of KDE. I've -used KDE since before version 1.0, and wanted to Valgrind to be an -indirect contribution to the KDE 3 development effort. At the start of -Feb 02 the kde-core-devel crew started using it, and gave a huge -amount of helpful feedback and patches in the space of three weeks. -Snapshot 20020306 is the result. - -

-In the best Unix tradition, or perhaps in the spirit of Fred Brooks' -depressing-but-completely-accurate epitaph "build one to throw away; -you will anyway", much of Valgrind is a second or third rendition of -the initial idea. The instrumentation machinery -(vg_translate.c, vg_memory.c) and core CPU -simulation (vg_to_ucode.c, vg_from_ucode.c) -have had three redesigns and rewrites; the register allocator, -low-level memory manager (vg_malloc2.c) and symbol table -reader (vg_symtab2.c) are on the second rewrite. In a -sense, this document serves to record some of the knowledge gained as -a result. - - -

Design overview

- -Valgrind is compiled into a Linux shared object, -valgrind.so, and also a dummy one, -valgrinq.so, of which more later. The -valgrind shell script adds valgrind.so to -the LD_PRELOAD list of extra libraries to be -loaded with any dynamically linked library. This is a standard trick, -one which I assume the LD_PRELOAD mechanism was developed -to support. - -

-valgrind.so -is linked with the -z initfirst flag, which requests that -its initialisation code is run before that of any other object in the -executable image. When this happens, valgrind gains control. The -real CPU becomes "trapped" in valgrind.so and the -translations it generates. The synthetic CPU provided by Valgrind -does, however, return from this initialisation function. So the -normal startup actions, orchestrated by the dynamic linker -ld.so, continue as usual, except on the synthetic CPU, -not the real one. Eventually main is run and returns, -and then the finalisation code of the shared objects is run, -presumably in inverse order to which they were initialised. Remember, -this is still all happening on the simulated CPU. Eventually -valgrind.so's own finalisation code is called. It spots -this event, shuts down the simulated CPU, prints any error summaries -and/or does leak detection, and returns from the initialisation code -on the real CPU. At this point, in effect the real and synthetic CPUs -have merged back into one, Valgrind has lost control of the program, -and the program finally exit()s back to the kernel in the -usual way. - -

-The normal course of activity, one Valgrind has started up, is as -follows. Valgrind never runs any part of your program (usually -referred to as the "client"), not a single byte of it, directly. -Instead it uses function VG_(translate) to translate -basic blocks (BBs, straight-line sequences of code) into instrumented -translations, and those are run instead. The translations are stored -in the translation cache (TC), vg_tc, with the -translation table (TT), vg_tt supplying the -original-to-translation code address mapping. Auxiliary array -VG_(tt_fast) is used as a direct-map cache for fast -lookups in TT; it usually achieves a hit rate of around 98% and -facilitates an orig-to-trans lookup in 4 x86 insns, which is not bad. - -

-Function VG_(dispatch) in vg_dispatch.S is -the heart of the JIT dispatcher. Once a translated code address has -been found, it is executed simply by an x86 call -to the translation. At the end of the translation, the next -original code addr is loaded into %eax, and the -translation then does a ret, taking it back to the -dispatch loop, with, interestingly, zero branch mispredictions. -The address requested in %eax is looked up first in -VG_(tt_fast), and, if not found, by calling C helper -VG_(search_transtab). If there is still no translation -available, VG_(dispatch) exits back to the top-level -C dispatcher VG_(toploop), which arranges for -VG_(translate) to make a new translation. All fairly -unsurprising, really. There are various complexities described below. - -

-The translator, orchestrated by VG_(translate), is -complicated but entirely self-contained. It is described in great -detail in subsequent sections. Translations are stored in TC, with TT -tracking administrative information. The translations are subject to -an approximate LRU-based management scheme. With the current -settings, the TC can hold at most about 15MB of translations, and LRU -passes prune it to about 13.5MB. Given that the -orig-to-translation expansion ratio is about 13:1 to 14:1, this means -TC holds translations for more or less a megabyte of original code, -which generally comes to about 70000 basic blocks for C++ compiled -with optimisation on. Generating new translations is expensive, so it -is worth having a large TC to minimise the (capacity) miss rate. - -

-The dispatcher, VG_(dispatch), receives hints from -the translations which allow it to cheaply spot all control -transfers corresponding to x86 call and ret -instructions. It has to do this in order to spot some special events: -

    -
  • Calls to VG_(shutdown). This is Valgrind's cue to - exit. NOTE: actually this is done a different way; it should be - cleaned up. -

    -

  • Returns of system call handlers, to the return address - VG_(signalreturn_bogusRA). The signal simulator - needs to know when a signal handler is returning, so we spot - jumps (returns) to this address. -

    -

  • Calls to vg_trap_here. All malloc, - free, etc calls that the client program makes are - eventually routed to a call to vg_trap_here, - and Valgrind does its own special thing with these calls. - In effect this provides a trapdoor, by which Valgrind can - intercept certain calls on the simulated CPU, run the call as it - sees fit itself (on the real CPU), and return the result to - the simulated CPU, quite transparently to the client program. -
-Valgrind intercepts the client's malloc, -free, etc, -calls, so that it can store additional information. Each block -malloc'd by the client gives rise to a shadow block -in which Valgrind stores the call stack at the time of the -malloc -call. When the client calls free, Valgrind tries to -find the shadow block corresponding to the address passed to -free, and emits an error message if none can be found. -If it is found, the block is placed on the freed blocks queue -vg_freed_list, it is marked as inaccessible, and -its shadow block now records the call stack at the time of the -free call. Keeping free'd blocks in -this queue allows Valgrind to spot all (presumably invalid) accesses -to them. However, once the volume of blocks in the free queue -exceeds VG_(clo_freelist_vol), blocks are finally -removed from the queue. - -

-Keeping track of A and V bits (note: if you don't know what these are, -you haven't read the user guide carefully enough) for memory is done -in vg_memory.c. This implements a sparse array structure -which covers the entire 4G address space in a way which is reasonably -fast and reasonably space efficient. The 4G address space is divided -up into 64K sections, each covering 64Kb of address space. Given a -32-bit address, the top 16 bits are used to select one of the 65536 -entries in VG_(primary_map). The resulting "secondary" -(SecMap) holds A and V bits for the 64k of address space -chunk corresponding to the lower 16 bits of the address. - - -

Design decisions

- -Some design decisions were motivated by the need to make Valgrind -debuggable. Imagine you are writing a CPU simulator. It works fairly -well. However, you run some large program, like Netscape, and after -tens of millions of instructions, it crashes. How can you figure out -where in your simulator the bug is? - -

-Valgrind's answer is: cheat. Valgrind is designed so that it is -possible to switch back to running the client program on the real -CPU at any point. Using the --stop-after= flag, you can -ask Valgrind to run just some number of basic blocks, and then -run the rest of the way on the real CPU. If you are searching for -a bug in the simulated CPU, you can use this to do a binary search, -which quickly leads you to the specific basic block which is -causing the problem. - -

-This is all very handy. It does constrain the design in certain -unimportant ways. Firstly, the layout of memory, when viewed from the -client's point of view, must be identical regardless of whether it is -running on the real or simulated CPU. This means that Valgrind can't -do pointer swizzling -- well, no great loss -- and it can't run on -the same stack as the client -- again, no great loss. -Valgrind operates on its own stack, VG_(stack), which -it switches to at startup, temporarily switching back to the client's -stack when doing system calls for the client. - -

-Valgrind also receives signals on its own stack, -VG_(sigstack), but for different gruesome reasons -discussed below. - -

-This nice clean switch-back-to-the-real-CPU-whenever-you-like story -is muddied by signals. Problem is that signals arrive at arbitrary -times and tend to slightly perturb the basic block count, with the -result that you can get close to the basic block causing a problem but -can't home in on it exactly. My kludgey hack is to define -SIGNAL_SIMULATION to 1 towards the bottom of -vg_syscall_mem.c, so that signal handlers are run on the -real CPU and don't change the BB counts. - -

-A second hole in the switch-back-to-real-CPU story is that Valgrind's -way of delivering signals to the client is different from that of the -kernel. Specifically, the layout of the signal delivery frame, and -the mechanism used to detect a sighandler returning, are different. -So you can't expect to make the transition inside a sighandler and -still have things working, but in practice that's not much of a -restriction. - -

-Valgrind's implementation of malloc, free, -etc, (in vg_clientmalloc.c, not the low-level stuff in -vg_malloc2.c) is somewhat complicated by the need to -handle switching back at arbitrary points. It does work tho. - - - -

Correctness

- -There's only one of me, and I have a Real Life (tm) as well as hacking -Valgrind [allegedly :-]. That means I don't have time to waste -chasing endless bugs in Valgrind. My emphasis is therefore on doing -everything as simply as possible, with correctness, stability and -robustness being the number one priority, more important than -performance or functionality. As a result: -
    -
  • The code is absolutely loaded with assertions, and these are - permanently enabled. I have no plan to remove or disable - them later. Over the past couple of months, as valgrind has - become more widely used, they have shown their worth, pulling - up various bugs which would otherwise have appeared as - hard-to-find segmentation faults. -

    - I am of the view that it's acceptable to spend 5% of the total - running time of your valgrindified program doing assertion checks - and other internal sanity checks. -

    -

  • Aside from the assertions, valgrind contains various sets of - internal sanity checks, which get run at varying frequencies - during normal operation. VG_(do_sanity_checks) - runs every 1000 basic blocks, which means 500 to 2000 times/second - for typical machines at present. It checks that Valgrind hasn't - overrun its private stack, and does some simple checks on the - memory permissions maps. Once every 25 calls it does some more - extensive checks on those maps. Etc, etc. -

    - The following components also have sanity check code, which can - be enabled to aid debugging: -

      -
    • The low-level memory-manager - (VG_(mallocSanityCheckArena)). This does a - complete check of all blocks and chains in an arena, which - is very slow. Is not engaged by default. -

      -

    • The symbol table reader(s): various checks to ensure - uniqueness of mappings; see VG_(read_symbols) - for a start. Is permanently engaged. -

      -

    • The A and V bit tracking stuff in vg_memory.c. - This can be compiled with cpp symbol - VG_DEBUG_MEMORY defined, which removes all the - fast, optimised cases, and uses simple-but-slow fallbacks - instead. Not engaged by default. -

      -

    • Ditto VG_DEBUG_LEAKCHECK. -

      -

    • The JITter parses x86 basic blocks into sequences of - UCode instructions. It then sanity checks each one with - VG_(saneUInstr) and sanity checks the sequence - as a whole with VG_(saneUCodeBlock). This stuff - is engaged by default, and has caught some way-obscure bugs - in the simulated CPU machinery in its time. -

      -

    • The system call wrapper does - VG_(first_and_last_secondaries_look_plausible) after - every syscall; this is known to pick up bugs in the syscall - wrappers. Engaged by default. -

      -

    • The main dispatch loop, in VG_(dispatch), checks - that translations do not set %ebp to any value - different from VG_EBP_DISPATCH_CHECKED or - & VG_(baseBlock). In effect this test is free, - and is permanently engaged. -

      -

    • There are a couple of ifdefed-out consistency checks I - inserted whilst debugging the new register allocater, - vg_do_register_allocation. -
    -

    -

  • I try to avoid techniques, algorithms, mechanisms, etc, for which - I can supply neither a convincing argument that they are correct, - nor sanity-check code which might pick up bugs in my - implementation. I don't always succeed in this, but I try. - Basically the idea is: avoid techniques which are, in practice, - unverifiable, in some sense. When doing anything, always have in - mind: "how can I verify that this is correct?" -
- -

-Some more specific things are: - -

    -
  • Valgrind runs in the same namespace as the client, at least from - ld.so's point of view, and it therefore absolutely - had better not export any symbol with a name which could clash - with that of the client or any of its libraries. Therefore, all - globally visible symbols exported from valgrind.so - are defined using the VG_ CPP macro. As you'll see - from vg_constants.h, this appends some arbitrary - prefix to the symbol, in order that it be, we hope, globally - unique. Currently the prefix is vgPlain_. For - convenience there are also VGM_, VGP_ - and VGOFF_. All locally defined symbols are declared - static and do not appear in the final shared object. -

    - To check this, I periodically do - nm valgrind.so | grep " T ", - which shows you all the globally exported text symbols. - They should all have an approved prefix, except for those like - malloc, free, etc, which we deliberately - want to shadow and take precedence over the same names exported - from glibc.so, so that valgrind can intercept those - calls easily. Similarly, nm valgrind.so | grep " D " - allows you to find any rogue data-segment symbol names. -

    -

  • Valgrind tries, and almost succeeds, in being completely - independent of all other shared objects, in particular of - glibc.so. For example, we have our own low-level - memory manager in vg_malloc2.c, which is a fairly - standard malloc/free scheme augmented with arenas, and - vg_mylibc.c exports reimplementations of various bits - and pieces you'd normally get from the C library. -

    - Why all the hassle? Because imagine the potential chaos of both - the simulated and real CPUs executing in glibc.so. - It just seems simpler and cleaner to be completely self-contained, - so that only the simulated CPU visits glibc.so. In - practice it's not much hassle anyway. Also, valgrind starts up - before glibc has a chance to initialise itself, and who knows what - difficulties that could lead to. Finally, glibc has definitions - for some types, specifically sigset_t, which conflict - (are different from) the Linux kernel's idea of same. When - Valgrind wants to fiddle around with signal stuff, it wants to - use the kernel's definitions, not glibc's definitions. So it's - simplest just to keep glibc out of the picture entirely. -

    - To find out which glibc symbols are used by Valgrind, reinstate - the link flags -nostdlib -Wl,-no-undefined. This - causes linking to fail, but will tell you what you depend on. - I have mostly, but not entirely, got rid of the glibc - dependencies; what remains is, IMO, fairly harmless. AFAIK the - current dependencies are: memset, - memcmp, stat, system, - sbrk, setjmp and longjmp. - -

    -

  • Similarly, valgrind should not really import any headers other - than the Linux kernel headers, since it knows of no API other than - the kernel interface to talk to. At the moment this is really not - in a good state, and vg_syscall_mem imports, via - vg_unsafe.h, a significant number of C-library - headers so as to know the sizes of various structs passed across - the kernel boundary. This is of course completely bogus, since - there is no guarantee that the C library's definitions of these - structs matches those of the kernel. I have started to sort this - out using vg_kerneliface.h, into which I had intended - to copy all kernel definitions which valgrind could need, but this - has not gotten very far. At the moment it mostly contains - definitions for sigset_t and struct - sigaction, since the kernel's definition for these really - does clash with glibc's. I plan to use a vki_ prefix - on all these types and constants, to denote the fact that they - pertain to Valgrind's Kernel Interface. -

    - Another advantage of having a vg_kerneliface.h file - is that it makes it simpler to interface to a different kernel. - Once can, for example, easily imagine writing a new - vg_kerneliface.h for FreeBSD, or x86 NetBSD. - -

- -

Current limitations

- -No threads. I think fixing this is close to a research-grade problem. -

-No MMX. Fixing this should be relatively easy, using the same giant -trick used for x86 FPU instructions. See below. -

-Support for weird (non-POSIX) signal stuff is patchy. Does anybody -care? -

- - - - -


- -

The instrumenting JITter

- -This really is the heart of the matter. We begin with various side -issues. - -

Run-time storage, and the use of host registers

- -Valgrind translates client (original) basic blocks into instrumented -basic blocks, which live in the translation cache TC, until either the -client finishes or the translations are ejected from TC to make room -for newer ones. -

-Since it generates x86 code in memory, Valgrind has complete control -of the use of registers in the translations. Now pay attention. I -shall say this only once, and it is important you understand this. In -what follows I will refer to registers in the host (real) cpu using -their standard names, %eax, %edi, etc. I -refer to registers in the simulated CPU by capitalising them: -%EAX, %EDI, etc. These two sets of -registers usually bear no direct relationship to each other; there is -no fixed mapping between them. This naming scheme is used fairly -consistently in the comments in the sources. -

-Host registers, once things are up and running, are used as follows: -

    -
  • %esp, the real stack pointer, points - somewhere in Valgrind's private stack area, - VG_(stack) or, transiently, into its signal delivery - stack, VG_(sigstack). -

    -

  • %edi is used as a temporary in code generation; it - is almost always dead, except when used for the Left - value-tag operations. -

    -

  • %eax, %ebx, %ecx, - %edx and %esi are available to - Valgrind's register allocator. They are dead (carry unimportant - values) in between translations, and are live only in - translations. The one exception to this is %eax, - which, as mentioned far above, has a special significance to the - dispatch loop VG_(dispatch): when a translation - returns to the dispatch loop, %eax is expected to - contain the original-code-address of the next translation to run. - The register allocator is so good at minimising spill code that - using five regs and not having to save/restore %edi - actually gives better code than allocating to %edi - as well, but then having to push/pop it around special uses. -

    -

  • %ebp points permanently at - VG_(baseBlock). Valgrind's translations are - position-independent, partly because this is convenient, but also - because translations get moved around in TC as part of the LRUing - activity. All static entities which need to be referred to - from generated code, whether data or helper functions, are stored - starting at VG_(baseBlock) and are therefore reached - by indexing from %ebp. There is but one exception, - which is that by placing the value - VG_EBP_DISPATCH_CHECKED - in %ebp just before a return to the dispatcher, - the dispatcher is informed that the next address to run, - in %eax, requires special treatment. -

    -

  • The real machine's FPU state is pretty much unimportant, for - reasons which will become obvious. Ditto its %eflags - register. -
- -

-The state of the simulated CPU is stored in memory, in -VG_(baseBlock), which is a block of 200 words IIRC. -Recall that %ebp points permanently at the start of this -block. Function vg_init_baseBlock decides what the -offsets of various entities in VG_(baseBlock) are to be, -and allocates word offsets for them. The code generator then emits -%ebp relative addresses to get at those things. The -sequence in which entities are allocated has been carefully chosen so -that the 32 most popular entities come first, because this means 8-bit -offsets can be used in the generated code. - -

-If I was clever, I could make %ebp point 32 words along -VG_(baseBlock), so that I'd have another 32 words of -short-form offsets available, but that's just complicated, and it's -not important -- the first 32 words take 99% (or whatever) of the -traffic. - -

-Currently, the sequence of stuff in VG_(baseBlock) is as -follows: -

    -
  • 9 words, holding the simulated integer registers, - %EAX .. %EDI, and the simulated flags, - %EFLAGS. -

    -

  • Another 9 words, holding the V bit "shadows" for the above 9 regs. -

    -

  • The addresses of various helper routines called from - generated code: - VG_(helper_value_check4_fail), - VG_(helper_value_check0_fail), - which register V-check failures, - VG_(helperc_STOREV4), - VG_(helperc_STOREV1), - VG_(helperc_LOADV4), - VG_(helperc_LOADV1), - which do stores and loads of V bits to/from the - sparse array which keeps track of V bits in memory, - and - VGM_(handle_esp_assignment), which messes with - memory addressibility resulting from changes in %ESP. -

    -

  • The simulated %EIP. -

    -

  • 24 spill words, for when the register allocator can't make it work - with 5 measly registers. -

    -

  • Addresses of helpers VG_(helperc_STOREV2), - VG_(helperc_LOADV2). These are here because 2-byte - loads and stores are relatively rare, so are placed above the - magic 32-word offset boundary. -

    -

  • For similar reasons, addresses of helper functions - VGM_(fpu_write_check) and - VGM_(fpu_read_check), which handle the A/V maps - testing and changes required by FPU writes/reads. -

    -

  • Some other boring helper addresses: - VG_(helper_value_check2_fail) and - VG_(helper_value_check1_fail). These are probably - never emitted now, and should be removed. -

    -

  • The entire state of the simulated FPU, which I believe to be - 108 bytes long. -

    -

  • Finally, the addresses of various other helper functions in - vg_helpers.S, which deal with rare situations which - are tedious or difficult to generate code in-line for. -
- -

-As a general rule, the simulated machine's state lives permanently in -memory at VG_(baseBlock). However, the JITter does some -optimisations which allow the simulated integer registers to be -cached in real registers over multiple simulated instructions within -the same basic block. These are always flushed back into memory at -the end of every basic block, so that the in-memory state is -up-to-date between basic blocks. (This flushing is implied by the -statement above that the real machine's allocatable registers are -dead in between simulated blocks). - - -

Startup, shutdown, and system calls

- -Getting into of Valgrind (VG_(startup), called from -valgrind.so's initialisation section), really means -copying the real CPU's state into VG_(baseBlock), and -then installing our own stack pointer, etc, into the real CPU, and -then starting up the JITter. Exiting valgrind involves copying the -simulated state back to the real state. - -

-Unfortunately, there's a complication at startup time. Problem is -that at the point where we need to take a snapshot of the real CPU's -state, the offsets in VG_(baseBlock) are not set up yet, -because to do so would involve disrupting the real machine's state -significantly. The way round this is to dump the real machine's state -into a temporary, static block of memory, -VG_(m_state_static). We can then set up the -VG_(baseBlock) offsets at our leisure, and copy into it -from VG_(m_state_static) at some convenient later time. -This copying is done by -VG_(copy_m_state_static_to_baseBlock). - -

-On exit, the inverse transformation is (rather unnecessarily) used: -stuff in VG_(baseBlock) is copied to -VG_(m_state_static), and the assembly stub then copies -from VG_(m_state_static) into the real machine registers. - -

-Doing system calls on behalf of the client (vg_syscall.S) -is something of a half-way house. We have to make the world look -sufficiently like that which the client would normally have to make -the syscall actually work properly, but we can't afford to lose -control. So the trick is to copy all of the client's state, except -its program counter, into the real CPU, do the system call, and -copy the state back out. Note that the client's state includes its -stack pointer register, so one effect of this partial restoration is -to cause the system call to be run on the client's stack, as it should -be. - -

-As ever there are complications. We have to save some of our own state -somewhere when restoring the client's state into the CPU, so that we -can keep going sensibly afterwards. In fact the only thing which is -important is our own stack pointer, but for paranoia reasons I save -and restore our own FPU state as well, even though that's probably -pointless. - -

-The complication on the above complication is, that for horrible -reasons to do with signals, we may have to handle a second client -system call whilst the client is blocked inside some other system -call (unbelievable!). That means there's two sets of places to -dump Valgrind's stack pointer and FPU state across the syscall, -and we decide which to use by consulting -VG_(syscall_depth), which is in turn maintained by -VG_(wrap_syscall). - - - -

Introduction to UCode

- -UCode lies at the heart of the x86-to-x86 JITter. The basic premise -is that dealing the the x86 instruction set head-on is just too darn -complicated, so we do the traditional compiler-writer's trick and -translate it into a simpler, easier-to-deal-with form. - -

-In normal operation, translation proceeds through six stages, -coordinated by VG_(translate): -

    -
  1. Parsing of an x86 basic block into a sequence of UCode - instructions (VG_(disBB)). -

    -

  2. UCode optimisation (vg_improve), with the aim of - caching simulated registers in real registers over multiple - simulated instructions, and removing redundant simulated - %EFLAGS saving/restoring. -

    -

  3. UCode instrumentation (vg_instrument), which adds - value and address checking code. -

    -

  4. Post-instrumentation cleanup (vg_cleanup), removing - redundant value-check computations. -

    -

  5. Register allocation (vg_do_register_allocation), - which, note, is done on UCode. -

    -

  6. Emission of final instrumented x86 code - (VG_(emit_code)). -
- -

-Notice how steps 2, 3, 4 and 5 are simple UCode-to-UCode -transformation passes, all on straight-line blocks of UCode (type -UCodeBlock). Steps 2 and 4 are optimisation passes and -can be disabled for debugging purposes, with ---optimise=no and --cleanup=no respectively. - -

-Valgrind can also run in a no-instrumentation mode, given ---instrument=no. This is useful for debugging the JITter -quickly without having to deal with the complexity of the -instrumentation mechanism too. In this mode, steps 3 and 4 are -omitted. - -

-These flags combine, so that --instrument=no together with ---optimise=no means only steps 1, 5 and 6 are used. ---single-step=yes causes each x86 instruction to be -treated as a single basic block. The translations are terrible but -this is sometimes instructive. - -

-The --stop-after=N flag switches back to the real CPU -after N basic blocks. It also re-JITs the final basic -block executed and prints the debugging info resulting, so this -gives you a way to get a quick snapshot of how a basic block looks as -it passes through the six stages mentioned above. If you want to -see full information for every block translated (probably not, but -still ...) find, in VG_(translate), the lines -
dis = True; -
dis = debugging_translation; -
-and comment out the second line. This will spew out debugging -junk faster than you can possibly imagine. - - - -

UCode operand tags: type Tag

- -UCode is, more or less, a simple two-address RISC-like code. In -keeping with the x86 AT&T assembly syntax, generally speaking the -first operand is the source operand, and the second is the destination -operand, which is modified when the uinstr is notionally executed. - -

-UCode instructions have up to three operand fields, each of which has -a corresponding Tag describing it. Possible values for -the tag are: - -

    -
  • NoValue: indicates that the field is not in use. -

    -

  • Lit16: the field contains a 16-bit literal. -

    -

  • Literal: the field denotes a 32-bit literal, whose - value is stored in the lit32 field of the uinstr - itself. Since there is only one lit32 for the whole - uinstr, only one operand field may contain this tag. -

    -

  • SpillNo: the field contains a spill slot number, in - the range 0 to 23 inclusive, denoting one of the spill slots - contained inside VG_(baseBlock). Such tags only - exist after register allocation. -

    -

  • RealReg: the field contains a number in the range 0 - to 7 denoting an integer x86 ("real") register on the host. The - number is the Intel encoding for integer registers. Such tags - only exist after register allocation. -

    -

  • ArchReg: the field contains a number in the range 0 - to 7 denoting an integer x86 register on the simulated CPU. In - reality this means a reference to one of the first 8 words of - VG_(baseBlock). Such tags can exist at any point in - the translation process. -

    -

  • Last, but not least, TempReg. The field contains the - number of one of an infinite set of virtual (integer) - registers. TempRegs are used everywhere throughout - the translation process; you can have as many as you want. The - register allocator maps as many as it can into - RealRegs and turns the rest into - SpillNos, so TempRegs should not exist - after the register allocation phase. -

    - TempRegs are always 32 bits long, even if the data - they hold is logically shorter. In that case the upper unused - bits are required, and, I think, generally assumed, to be zero. - TempRegs holding V bits for quantities shorter than - 32 bits are expected to have ones in the unused places, since a - one denotes "undefined". -

- - -

UCode instructions: type UInstr

- -

-UCode was carefully designed to make it possible to do register -allocation on UCode and then translate the result into x86 code -without needing any extra registers ... well, that was the original -plan, anyway. Things have gotten a little more complicated since -then. In what follows, UCode instructions are referred to as uinstrs, -to distinguish them from x86 instructions. Uinstrs of course have -uopcodes which are (naturally) different from x86 opcodes. - -

-A uinstr (type UInstr) contains -various fields, not all of which are used by any one uopcode: -

    -
  • Three 16-bit operand fields, val1, val2 - and val3. -

    -

  • Three tag fields, tag1, tag2 - and tag3. Each of these has a value of type - Tag, - and they describe what the val1, val2 - and val3 fields contain. -

    -

  • A 32-bit literal field. -

    -

  • Two FlagSets, specifying which x86 condition codes are - read and written by the uinstr. -

    -

  • An opcode byte, containing a value of type Opcode. -

    -

  • A size field, indicating the data transfer size (1/2/4/8/10) in - cases where this makes sense, or zero otherwise. -

    -

  • A condition-code field, which, for jumps, holds a - value of type Condcode, indicating the condition - which applies. The encoding is as it is in the x86 insn stream, - except we add a 17th value CondAlways to indicate - an unconditional transfer. -

    -

  • Various 1-bit flags, indicating whether this insn pertains to an - x86 CALL or RET instruction, whether a widening is signed or not, - etc. -
- -

-UOpcodes (type Opcode) are divided into two groups: those -necessary merely to express the functionality of the x86 code, and -extra uopcodes needed to express the instrumentation. The former -group contains: -

    -
  • GET and PUT, which move values from the - simulated CPU's integer registers (ArchRegs) into - TempRegs, and back. GETF and - PUTF do the corresponding thing for the simulated - %EFLAGS. There are no corresponding insns for the - FPU register stack, since we don't explicitly simulate its - registers. -

    -

  • LOAD and STORE, which, in RISC-like - fashion, are the only uinstrs able to interact with memory. -

    -

  • MOV and CMOV allow unconditional and - conditional moves of values between TempRegs. -

    -

  • ALU operations. Again in RISC-like fashion, these only operate on - TempRegs (before reg-alloc) or RealRegs - (after reg-alloc). These are: ADD, ADC, - AND, OR, XOR, - SUB, SBB, SHL, - SHR, SAR, ROL, - ROR, RCL, RCR, - NOT, NEG, INC, - DEC, BSWAP, CC2VAL and - WIDEN. WIDEN does signed or unsigned - value widening. CC2VAL is used to convert condition - codes into a value, zero or one. The rest are obvious. -

    - To allow for more efficient code generation, we bend slightly the - restriction at the start of the previous para: for - ADD, ADC, XOR, - SUB and SBB, we allow the first (source) - operand to also be an ArchReg, that is, one of the - simulated machine's registers. Also, many of these ALU ops allow - the source operand to be a literal. See - VG_(saneUInstr) for the final word on the allowable - forms of uinstrs. -

    -

  • LEA1 and LEA2 are not strictly - necessary, but allow faciliate better translations. They - record the fancy x86 addressing modes in a direct way, which - allows those amodes to be emitted back into the final - instruction stream more or less verbatim. -

    -

  • CALLM calls a machine-code helper, one of the methods - whose address is stored at some VG_(baseBlock) - offset. PUSH and POP move values - to/from TempReg to the real (Valgrind's) stack, and - CLEAR removes values from the stack. - CALLM_S and CALLM_E delimit the - boundaries of call setups and clearings, for the benefit of the - instrumentation passes. Getting this right is critical, and so - VG_(saneUCodeBlock) makes various checks on the use - of these uopcodes. -

    - It is important to understand that these uopcodes have nothing to - do with the x86 call, return, - push or pop instructions, and are not - used to implement them. Those guys turn into combinations of - GET, PUT, LOAD, - STORE, ADD, SUB, and - JMP. What these uopcodes support is calling of - helper functions such as VG_(helper_imul_32_64), - which do stuff which is too difficult or tedious to emit inline. -

    -

  • FPU, FPU_R and FPU_W. - Valgrind doesn't attempt to simulate the internal state of the - FPU at all. Consequently it only needs to be able to distinguish - FPU ops which read and write memory from those that don't, and - for those which do, it needs to know the effective address and - data transfer size. This is made easier because the x86 FP - instruction encoding is very regular, basically consisting of - 16 bits for a non-memory FPU insn and 11 (IIRC) bits + an address mode - for a memory FPU insn. So our FPU uinstr carries - the 16 bits in its val1 field. And - FPU_R and FPU_W carry 11 bits in that - field, together with the identity of a TempReg or - (later) RealReg which contains the address. -

    -

  • JIFZ is unique, in that it allows a control-flow - transfer which is not deemed to end a basic block. It causes a - jump to a literal (original) address if the specified argument - is zero. -

    -

  • Finally, INCEIP advances the simulated - %EIP by the specified literal amount. This supports - lazy %EIP updating, as described below. -
- -

-Stages 1 and 2 of the 6-stage translation process mentioned above -deal purely with these uopcodes, and no others. They are -sufficient to express pretty much all the x86 32-bit protected-mode -instruction set, at -least everything understood by a pre-MMX original Pentium (P54C). - -

-Stages 3, 4, 5 and 6 also deal with the following extra -"instrumentation" uopcodes. They are used to express all the -definedness-tracking and -checking machinery which valgrind does. In -later sections we show how to create checking code for each of the -uopcodes above. Note that these instrumentation uopcodes, although -some appearing complicated, have been carefully chosen so that -efficient x86 code can be generated for them. GNU superopt v2.5 did a -great job helping out here. Anyways, the uopcodes are as follows: - -

    -
  • GETV and PUTV are analogues to - GET and PUT above. They are identical - except that they move the V bits for the specified values back and - forth to TempRegs, rather than moving the values - themselves. -

    -

  • Similarly, LOADV and STOREV read and - write V bits from the synthesised shadow memory that Valgrind - maintains. In fact they do more than that, since they also do - address-validity checks, and emit complaints if the read/written - addresses are unaddressible. -

    -

  • TESTV, whose parameters are a TempReg - and a size, tests the V bits in the TempReg, at the - specified operation size (0/1/2/4 byte) and emits an error if any - of them indicate undefinedness. This is the only uopcode capable - of doing such tests. -

    -

  • SETV, whose parameters are also TempReg - and a size, makes the V bits in the TempReg indicated - definedness, at the specified operation size. This is usually - used to generate the correct V bits for a literal value, which is - of course fully defined. -

    -

  • GETVF and PUTVF are analogues to - GETF and PUTF. They move the single V - bit used to model definedness of %EFLAGS between its - home in VG_(baseBlock) and the specified - TempReg. -

    -

  • TAG1 denotes one of a family of unary operations on - TempRegs containing V bits. Similarly, - TAG2 denotes one in a family of binary operations on - V bits. -
- -

-These 10 uopcodes are sufficient to express Valgrind's entire -definedness-checking semantics. In fact most of the interesting magic -is done by the TAG1 and TAG2 -suboperations. - -

-First, however, I need to explain about V-vector operation sizes. -There are 4 sizes: 1, 2 and 4, which operate on groups of 8, 16 and 32 -V bits at a time, supporting the usual 1, 2 and 4 byte x86 operations. -However there is also the mysterious size 0, which really means a -single V bit. Single V bits are used in various circumstances; in -particular, the definedness of %EFLAGS is modelled with a -single V bit. Now might be a good time to also point out that for -V bits, 1 means "undefined" and 0 means "defined". Similarly, for A -bits, 1 means "invalid address" and 0 means "valid address". This -seems counterintuitive (and so it is), but testing against zero on -x86s saves instructions compared to testing against all 1s, because -many ALU operations set the Z flag for free, so to speak. - -

-With that in mind, the tag ops are: - -

    -
  • (UNARY) Pessimising casts: VgT_PCast40, - VgT_PCast20, VgT_PCast10, - VgT_PCast01, VgT_PCast02 and - VgT_PCast04. A "pessimising cast" takes a V-bit - vector at one size, and creates a new one at another size, - pessimised in the sense that if any of the bits in the source - vector indicate undefinedness, then all the bits in the result - indicate undefinedness. In this case the casts are all to or from - a single V bit, so for example VgT_PCast40 is a - pessimising cast from 32 bits to 1, whereas - VgT_PCast04 simply copies the single source V bit - into all 32 bit positions in the result. Surprisingly, these ops - can all be implemented very efficiently. -

    - There are also the pessimising casts VgT_PCast14, - from 8 bits to 32, VgT_PCast12, from 8 bits to 16, - and VgT_PCast11, from 8 bits to 8. This last one - seems nonsensical, but in fact it isn't a no-op because, as - mentioned above, any undefined (1) bits in the source infect the - entire result. -

    -

  • (UNARY) Propagating undefinedness upwards in a word: - VgT_Left4, VgT_Left2 and - VgT_Left1. These are used to simulate the worst-case - effects of carry propagation in adds and subtracts. They return a - V vector identical to the original, except that if the original - contained any undefined bits, then it and all bits above it are - marked as undefined too. Hence the Left bit in the names. -

    -

  • (UNARY) Signed and unsigned value widening: - VgT_SWiden14, VgT_SWiden24, - VgT_SWiden12, VgT_ZWiden14, - VgT_ZWiden24 and VgT_ZWiden12. These - mimic the definedness effects of standard signed and unsigned - integer widening. Unsigned widening creates zero bits in the new - positions, so VgT_ZWiden* accordingly park mark - those parts of their argument as defined. Signed widening copies - the sign bit into the new positions, so VgT_SWiden* - copies the definedness of the sign bit into the new positions. - Because 1 means undefined and 0 means defined, these operations - can (fascinatingly) be done by the same operations which they - mimic. Go figure. -

    -

  • (BINARY) Undefined-if-either-Undefined, - Defined-if-either-Defined: VgT_UifU4, - VgT_UifU2, VgT_UifU1, - VgT_UifU0, VgT_DifD4, - VgT_DifD2, VgT_DifD1. These do simple - bitwise operations on pairs of V-bit vectors, with - UifU giving undefined if either arg bit is - undefined, and DifD giving defined if either arg bit - is defined. Abstract interpretation junkies, if any make it this - far, may like to think of them as meets and joins (or is it joins - and meets) in the definedness lattices. -

    -

  • (BINARY; one value, one V bits) Generate argument improvement - terms for AND and OR: VgT_ImproveAND4_TQ, - VgT_ImproveAND2_TQ, VgT_ImproveAND1_TQ, - VgT_ImproveOR4_TQ, VgT_ImproveOR2_TQ, - VgT_ImproveOR1_TQ. These help out with AND and OR - operations. AND and OR have the inconvenient property that the - definedness of the result depends on the actual values of the - arguments as well as their definedness. At the bit level: -
    1 AND undefined = undefined, but -
    0 AND undefined = 0, and similarly -
    0 OR undefined = undefined, but -
    1 OR undefined = 1. -
    -

    - It turns out that gcc (quite legitimately) generates code which - relies on this fact, so we have to model it properly in order to - avoid flooding users with spurious value errors. The ultimate - definedness result of AND and OR is calculated using - UifU on the definedness of the arguments, but we - also DifD in some "improvement" terms which - take into account the above phenomena. -

    - ImproveAND takes as its first argument the actual - value of an argument to AND (the T) and the definedness of that - argument (the Q), and returns a V-bit vector which is defined (0) - for bits which have value 0 and are defined; this, when - DifD into the final result causes those bits to be - defined even if the corresponding bit in the other argument is undefined. -

    - The ImproveOR ops do the dual thing for OR - arguments. Note that XOR does not have this property that one - argument can make the other irrelevant, so there is no need for - such complexity for XOR. -

- -

-That's all the tag ops. If you stare at this long enough, and then -run Valgrind and stare at the pre- and post-instrumented ucode, it -should be fairly obvious how the instrumentation machinery hangs -together. - -

-One point, if you do this: in order to make it easy to differentiate -TempRegs carrying values from TempRegs -carrying V bit vectors, Valgrind prints the former as (for example) -t28 and the latter as q28; the fact that -they carry the same number serves to indicate their relationship. -This is purely for the convenience of the human reader; the register -allocator and code generator don't regard them as different. - - -

Translation into UCode

- -VG_(disBB) allocates a new UCodeBlock and -then uses disInstr to translate x86 instructions one at a -time into UCode, dumping the result in the UCodeBlock. -This goes on until a control-flow transfer instruction is encountered. - -

-Despite the large size of vg_to_ucode.c, this translation -is really very simple. Each x86 instruction is translated entirely -independently of its neighbours, merrily allocating new -TempRegs as it goes. The idea is to have a simple -translator -- in reality, no more than a macro-expander -- and the -- -resulting bad UCode translation is cleaned up by the UCode -optimisation phase which follows. To give you an idea of some x86 -instructions and their translations (this is a complete basic block, -as Valgrind sees it): -

-        0x40435A50:  incl %edx
-
-           0: GETL      %EDX, t0
-           1: INCL      t0  (-wOSZAP)
-           2: PUTL      t0, %EDX
-
-        0x40435A51:  movsbl (%edx),%eax
-
-           3: GETL      %EDX, t2
-           4: LDB       (t2), t2
-           5: WIDENL_Bs t2
-           6: PUTL      t2, %EAX
-
-        0x40435A54:  testb $0x20, 1(%ecx,%eax,2)
-
-           7: GETL      %EAX, t6
-           8: GETL      %ECX, t8
-           9: LEA2L     1(t8,t6,2), t4
-          10: LDB       (t4), t10
-          11: MOVB      $0x20, t12
-          12: ANDB      t12, t10  (-wOSZACP)
-          13: INCEIPo   $9
-
-        0x40435A59:  jnz-8 0x40435A50
-
-          14: Jnzo      $0x40435A50  (-rOSZACP)
-          15: JMPo      $0x40435A5B
-
- -

-Notice how the block always ends with an unconditional jump to the -next block. This is a bit unnecessary, but makes many things simpler. - -

-Most x86 instructions turn into sequences of GET, -PUT, LEA1, LEA2, -LOAD and STORE. Some complicated ones -however rely on calling helper bits of code in -vg_helpers.S. The ucode instructions PUSH, -POP, CALL, CALLM_S and -CALLM_E support this. The calling convention is somewhat -ad-hoc and is not the C calling convention. The helper routines must -save all integer registers, and the flags, that they use. Args are -passed on the stack underneath the return address, as usual, and if -result(s) are to be returned, it (they) are either placed in dummy arg -slots created by the ucode PUSH sequence, or just -overwrite the incoming args. - -

-In order that the instrumentation mechanism can handle calls to these -helpers, VG_(saneUCodeBlock) enforces the following -restrictions on calls to helpers: - -

    -
  • Each CALL uinstr must be bracketed by a preceding - CALLM_S marker (dummy uinstr) and a trailing - CALLM_E marker. These markers are used by the - instrumentation mechanism later to establish the boundaries of the - PUSH, POP and CLEAR - sequences for the call. -

    -

  • PUSH, POP and CLEAR - may only appear inside sections bracketed by CALLM_S - and CALLM_E, and nowhere else. -

    -

  • In any such bracketed section, no two PUSH insns may - push the same TempReg. Dually, no two two - POPs may pop the same TempReg. -

    -

  • Finally, although this is not checked, args should be removed from - the stack with CLEAR, rather than POPs - into a TempReg which is not subsequently used. This - is because the instrumentation mechanism assumes that all values - POPped from the stack are actually used. -
- -Some of the translations may appear to have redundant -TempReg-to-TempReg moves. This helps the -next phase, UCode optimisation, to generate better code. - - - -

UCode optimisation

- -UCode is then subjected to an improvement pass -(vg_improve()), which blurs the boundaries between the -translations of the original x86 instructions. It's pretty -straightforward. Three transformations are done: - -
    -
  • Redundant GET elimination. Actually, more general - than that -- eliminates redundant fetches of ArchRegs. In our - running example, uinstr 3 GETs %EDX into - t2 despite the fact that, by looking at the previous - uinstr, it is already in t0. The GET is - therefore removed, and t2 renamed to t0. - Assuming t0 is allocated to a host register, it means - the simulated %EDX will exist in a host CPU register - for more than one simulated x86 instruction, which seems to me to - be a highly desirable property. -

    - There is some mucking around to do with subregisters; - %AL vs %AH %AX vs - %EAX etc. I can't remember how it works, but in - general we are very conservative, and these tend to invalidate the - caching. -

    -

  • Redundant PUT elimination. This annuls - PUTs of values back to simulated CPU registers if a - later PUT would overwrite the earlier - PUT value, and there is no intervening reads of the - simulated register (ArchReg). -

    - As before, we are paranoid when faced with subregister references. - Also, PUTs of %ESP are never annulled, - because it is vital the instrumenter always has an up-to-date - %ESP value available, %ESP changes - affect addressibility of the memory around the simulated stack - pointer. -

    - The implication of the above paragraph is that the simulated - machine's registers are only lazily updated once the above two - optimisation phases have run, with the exception of - %ESP. TempRegs go dead at the end of - every basic block, from which is is inferrable that any - TempReg caching a simulated CPU reg is flushed (back - into the relevant VG_(baseBlock) slot) at the end of - every basic block. The further implication is that the simulated - registers are only up-to-date at in between basic blocks, and not - at arbitrary points inside basic blocks. And the consequence of - that is that we can only deliver signals to the client in between - basic blocks. None of this seems any problem in practice. -

    -

  • Finally there is a simple def-use thing for condition codes. If - an earlier uinstr writes the condition codes, and the next uinsn - along which actually cares about the condition codes writes the - same or larger set of them, but does not read any, the earlier - uinsn is marked as not writing any condition codes. This saves - a lot of redundant cond-code saving and restoring. -
- -The effect of these transformations on our short block is rather -unexciting, and shown below. On longer basic blocks they can -dramatically improve code quality. - -
-at 3: delete GET, rename t2 to t0 in (4 .. 6)
-at 7: delete GET, rename t6 to t0 in (8 .. 9)
-at 1: annul flag write OSZAP due to later OSZACP
-
-Improved code:
-           0: GETL      %EDX, t0
-           1: INCL      t0
-           2: PUTL      t0, %EDX
-           4: LDB       (t0), t0
-           5: WIDENL_Bs t0
-           6: PUTL      t0, %EAX
-           8: GETL      %ECX, t8
-           9: LEA2L     1(t8,t0,2), t4
-          10: LDB       (t4), t10
-          11: MOVB      $0x20, t12
-          12: ANDB      t12, t10  (-wOSZACP)
-          13: INCEIPo   $9
-          14: Jnzo      $0x40435A50  (-rOSZACP)
-          15: JMPo      $0x40435A5B
-
- -

UCode instrumentation

- -Once you understand the meaning of the instrumentation uinstrs, -discussed in detail above, the instrumentation scheme is fairly -straighforward. Each uinstr is instrumented in isolation, and the -instrumentation uinstrs are placed before the original uinstr. -Our running example continues below. I have placed a blank line -after every original ucode, to make it easier to see which -instrumentation uinstrs correspond to which originals. - -

-As mentioned somewhere above, TempRegs carrying values -have names like t28, and each one has a shadow carrying -its V bits, with names like q28. This pairing aids in -reading instrumented ucode. - -

-One decision about all this is where to have "observation points", -that is, where to check that V bits are valid. I use a minimalistic -scheme, only checking where a failure of validity could cause the -original program to (seg)fault. So the use of values as memory -addresses causes a check, as do conditional jumps (these cause a check -on the definedness of the condition codes). And arguments -PUSHed for helper calls are checked, hence the wierd -restrictions on help call preambles described above. - -

-Another decision is that once a value is tested, it is thereafter -regarded as defined, so that we do not emit multiple undefined-value -errors for the same undefined value. That means that -TESTV uinstrs are always followed by SETV -on the same (shadow) TempRegs. Most of these -SETVs are redundant and are removed by the -post-instrumentation cleanup phase. - -

-The instrumentation for calling helper functions deserves further -comment. The definedness of results from a helper is modelled using -just one V bit. So, in short, we do pessimising casts of the -definedness of all the args, down to a single bit, and then -UifU these bits together. So this single V bit will say -"undefined" if any part of any arg is undefined. This V bit is then -pessimally cast back up to the result(s) sizes, as needed. If, by -seeing that all the args are got rid of with CLEAR and -none with POP, Valgrind sees that the result of the call -is not actually used, it immediately examines the result V bit with a -TESTV -- SETV pair. If it did not do this, -there would be no observation point to detect that the some of the -args to the helper were undefined. Of course, if the helper's results -are indeed used, we don't do this, since the result usage will -presumably cause the result definedness to be checked at some suitable -future point. - -

-In general Valgrind tries to track definedness on a bit-for-bit basis, -but as the above para shows, for calls to helpers we throw in the -towel and approximate down to a single bit. This is because it's too -complex and difficult to track bit-level definedness through complex -ops such as integer multiply and divide, and in any case there is no -reasonable code fragments which attempt to (eg) multiply two -partially-defined values and end up with something meaningful, so -there seems little point in modelling multiplies, divides, etc, in -that level of detail. - -

-Integer loads and stores are instrumented with firstly a test of the -definedness of the address, followed by a LOADV or -STOREV respectively. These turn into calls to -(for example) VG_(helperc_LOADV4). These helpers do two -things: they perform an address-valid check, and they load or store V -bits from/to the relevant address in the (simulated V-bit) memory. - -

-FPU loads and stores are different. As above the definedness of the -address is first tested. However, the helper routine for FPU loads -(VGM_(fpu_read_check)) emits an error if either the -address is invalid or the referenced area contains undefined values. -It has to do this because we do not simulate the FPU at all, and so -cannot track definedness of values loaded into it from memory, so we -have to check them as soon as they are loaded into the FPU, ie, at -this point. We notionally assume that everything in the FPU is -defined. - -

-It follows therefore that FPU writes first check the definedness of -the address, then the validity of the address, and finally mark the -written bytes as well-defined. - -

-If anyone is inspired to extend Valgrind to MMX/SSE insns, I suggest -you use the same trick. It works provided that the FPU/MMX unit is -not used to merely as a conduit to copy partially undefined data from -one place in memory to another. Unfortunately the integer CPU is used -like that (when copying C structs with holes, for example) and this is -the cause of much of the elaborateness of the instrumentation here -described. - -

-vg_instrument() in vg_translate.c actually -does the instrumentation. There are comments explaining how each -uinstr is handled, so we do not repeat that here. As explained -already, it is bit-accurate, except for calls to helper functions. -Unfortunately the x86 insns bt/bts/btc/btr are done by -helper fns, so bit-level accuracy is lost there. This should be fixed -by doing them inline; it will probably require adding a couple new -uinstrs. Also, left and right rotates through the carry flag (x86 -rcl and rcr) are approximated via a single -V bit; so far this has not caused anyone to complain. The -non-carry rotates, rol and ror, are much -more common and are done exactly. Re-visiting the instrumentation for -AND and OR, they seem rather verbose, and I wonder if it could be done -more concisely now. - -

-The lowercase o on many of the uopcodes in the running -example indicates that the size field is zero, usually meaning a -single-bit operation. - -

-Anyroads, the post-instrumented version of our running example looks -like this: - -

-Instrumented code:
-           0: GETVL     %EDX, q0
-           1: GETL      %EDX, t0
-
-           2: TAG1o     q0 = Left4 ( q0 )
-           3: INCL      t0
-
-           4: PUTVL     q0, %EDX
-           5: PUTL      t0, %EDX
-
-           6: TESTVL    q0
-           7: SETVL     q0
-           8: LOADVB    (t0), q0
-           9: LDB       (t0), t0
-
-          10: TAG1o     q0 = SWiden14 ( q0 )
-          11: WIDENL_Bs t0
-
-          12: PUTVL     q0, %EAX
-          13: PUTL      t0, %EAX
-
-          14: GETVL     %ECX, q8
-          15: GETL      %ECX, t8
-
-          16: MOVL      q0, q4
-          17: SHLL      $0x1, q4
-          18: TAG2o     q4 = UifU4 ( q8, q4 )
-          19: TAG1o     q4 = Left4 ( q4 )
-          20: LEA2L     1(t8,t0,2), t4
-
-          21: TESTVL    q4
-          22: SETVL     q4
-          23: LOADVB    (t4), q10
-          24: LDB       (t4), t10
-
-          25: SETVB     q12
-          26: MOVB      $0x20, t12
-
-          27: MOVL      q10, q14
-          28: TAG2o     q14 = ImproveAND1_TQ ( t10, q14 )
-          29: TAG2o     q10 = UifU1 ( q12, q10 )
-          30: TAG2o     q10 = DifD1 ( q14, q10 )
-          31: MOVL      q12, q14
-          32: TAG2o     q14 = ImproveAND1_TQ ( t12, q14 )
-          33: TAG2o     q10 = DifD1 ( q14, q10 )
-          34: MOVL      q10, q16
-          35: TAG1o     q16 = PCast10 ( q16 )
-          36: PUTVFo    q16
-          37: ANDB      t12, t10  (-wOSZACP)
-
-          38: INCEIPo   $9
-
-          39: GETVFo    q18
-          40: TESTVo    q18
-          41: SETVo     q18
-          42: Jnzo      $0x40435A50  (-rOSZACP)
-
-          43: JMPo      $0x40435A5B
-
- - -

UCode post-instrumentation cleanup

- -

-This pass, coordinated by vg_cleanup(), removes redundant -definedness computation created by the simplistic instrumentation -pass. It consists of two passes, -vg_propagate_definedness() followed by -vg_delete_redundant_SETVs. - -

-vg_propagate_definedness() is a simple -constant-propagation and constant-folding pass. It tries to determine -which TempRegs containing V bits will always indicate -"fully defined", and it propagates this information as far as it can, -and folds out as many operations as possible. For example, the -instrumentation for an ADD of a literal to a variable quantity will be -reduced down so that the definedness of the result is simply the -definedness of the variable quantity, since the literal is by -definition fully defined. - -

-vg_delete_redundant_SETVs removes SETVs on -shadow TempRegs for which the next action is a write. -I don't think there's anything else worth saying about this; it is -simple. Read the sources for details. - -

-So the cleaned-up running example looks like this. As above, I have -inserted line breaks after every original (non-instrumentation) uinstr -to aid readability. As with straightforward ucode optimisation, the -results in this block are undramatic because it is so short; longer -blocks benefit more because they have more redundancy which gets -eliminated. - - -

-at 29: delete UifU1 due to defd arg1
-at 32: change ImproveAND1_TQ to MOV due to defd arg2
-at 41: delete SETV
-at 31: delete MOV
-at 25: delete SETV
-at 22: delete SETV
-at 7: delete SETV
-
-           0: GETVL     %EDX, q0
-           1: GETL      %EDX, t0
-
-           2: TAG1o     q0 = Left4 ( q0 )
-           3: INCL      t0
-
-           4: PUTVL     q0, %EDX
-           5: PUTL      t0, %EDX
-
-           6: TESTVL    q0
-           8: LOADVB    (t0), q0
-           9: LDB       (t0), t0
-
-          10: TAG1o     q0 = SWiden14 ( q0 )
-          11: WIDENL_Bs t0
-
-          12: PUTVL     q0, %EAX
-          13: PUTL      t0, %EAX
-
-          14: GETVL     %ECX, q8
-          15: GETL      %ECX, t8
-
-          16: MOVL      q0, q4
-          17: SHLL      $0x1, q4
-          18: TAG2o     q4 = UifU4 ( q8, q4 )
-          19: TAG1o     q4 = Left4 ( q4 )
-          20: LEA2L     1(t8,t0,2), t4
-
-          21: TESTVL    q4
-          23: LOADVB    (t4), q10
-          24: LDB       (t4), t10
-
-          26: MOVB      $0x20, t12
-
-          27: MOVL      q10, q14
-          28: TAG2o     q14 = ImproveAND1_TQ ( t10, q14 )
-          30: TAG2o     q10 = DifD1 ( q14, q10 )
-          32: MOVL      t12, q14
-          33: TAG2o     q10 = DifD1 ( q14, q10 )
-          34: MOVL      q10, q16
-          35: TAG1o     q16 = PCast10 ( q16 )
-          36: PUTVFo    q16
-          37: ANDB      t12, t10  (-wOSZACP)
-
-          38: INCEIPo   $9
-          39: GETVFo    q18
-          40: TESTVo    q18
-          42: Jnzo      $0x40435A50  (-rOSZACP)
-
-          43: JMPo      $0x40435A5B
-
- - -

Translation from UCode

- -This is all very simple, even though vg_from_ucode.c -is a big file. Position-independent x86 code is generated into -a dynamically allocated array emitted_code; this is -doubled in size when it overflows. Eventually the array is handed -back to the caller of VG_(translate), who must copy -the result into TC and TT, and free the array. - -

-This file is structured into four layers of abstraction, which, -thankfully, are glued back together with extensive -__inline__ directives. From the bottom upwards: - -

    -
  • Address-mode emitters, emit_amode_regmem_reg et al. -

    -

  • Emitters for specific x86 instructions. There are quite a lot of - these, with names such as emit_movv_offregmem_reg. - The v suffix is Intel parlance for a 16/32 bit insn; - there are also b suffixes for 8 bit insns. -

    -

  • The next level up are the synth_* functions, which - synthesise possibly a sequence of raw x86 instructions to do some - simple task. Some of these are quite complex because they have to - work around Intel's silly restrictions on subregister naming. See - synth_nonshiftop_reg_reg for example. -

    -

  • Finally, at the top of the heap, we have - emitUInstr(), - which emits code for a single uinstr. -
- -

-Some comments: -

    -
  • The hack for FPU instructions becomes apparent here. To do a - FPU ucode instruction, we load the simulated FPU's - state into from its VG_(baseBlock) into the real FPU - using an x86 frstor insn, do the ucode - FPU insn on the real CPU, and write the updated FPU - state back into VG_(baseBlock) using an - fnsave instruction. This is pretty brutal, but is - simple and it works, and even seems tolerably efficient. There is - no attempt to cache the simulated FPU state in the real FPU over - multiple back-to-back ucode FPU instructions. -

    - FPU_R and FPU_W are also done this way, - with the minor complication that we need to patch in some - addressing mode bits so the resulting insn knows the effective - address to use. This is easy because of the regularity of the x86 - FPU instruction encodings. -

    -

  • An analogous trick is done with ucode insns which claim, in their - flags_r and flags_w fields, that they - read or write the simulated %EFLAGS. For such cases - we first copy the simulated %EFLAGS into the real - %eflags, then do the insn, then, if the insn says it - writes the flags, copy back to %EFLAGS. This is a - bit expensive, which is why the ucode optimisation pass goes to - some effort to remove redundant flag-update annotations. -
- -

-And so ... that's the end of the documentation for the instrumentating -translator! It's really not that complex, because it's composed as a -sequence of simple(ish) self-contained transformations on -straight-line blocks of code. - - -

Top-level dispatch loop

- -Urk. In VG_(toploop). This is basically boring and -unsurprising, not to mention fiddly and fragile. It needs to be -cleaned up. - -

-The only perhaps surprise is that the whole thing is run -on top of a setjmp-installed exception handler, because, -supposing a translation got a segfault, we have to bail out of the -Valgrind-supplied exception handler VG_(oursignalhandler) -and immediately start running the client's segfault handler, if it has -one. In particular we can't finish the current basic block and then -deliver the signal at some convenient future point, because signals -like SIGILL, SIGSEGV and SIGBUS mean that the faulting insn should not -simply be re-tried. (I'm sure there is a clearer way to explain this). - - -

Exceptions, creating new translations

-

Self-modifying code

- -

Lazy updates of the simulated program counter

- -Simulated %EIP is not updated after every simulated x86 -insn as this was regarded as too expensive. Instead ucode -INCEIP insns move it along as and when necessary. -Currently we don't allow it to fall more than 4 bytes behind reality -(see VG_(disBB) for the way this works). -

-Note that %EIP is always brought up to date by the inner -dispatch loop in VG_(dispatch), so that if the client -takes a fault we know at least which basic block this happened in. - - -

The translation cache and translation table

- -

Signals

- -Horrible, horrible. vg_signals.c. -Basically, since we have to intercept all system -calls anyway, we can see when the client tries to install a signal -handler. If it does so, we make a note of what the client asked to -happen, and ask the kernel to route the signal to our own signal -handler, VG_(oursignalhandler). This simply notes the -delivery of signals, and returns. - -

-Every 1000 basic blocks, we see if more signals have arrived. If so, -VG_(deliver_signals) builds signal delivery frames on the -client's stack, and allows their handlers to be run. Valgrind places -in these signal delivery frames a bogus return address, -VG_(signalreturn_bogusRA), and checks all jumps to see -if any jump to it. If so, this is a sign that a signal handler is -returning, and if so Valgrind removes the relevant signal frame from -the client's stack, restores the from the signal frame the simulated -state before the signal was delivered, and allows the client to run -onwards. We have to do it this way because some signal handlers never -return, they just longjmp(), which nukes the signal -delivery frame. - -

-The Linux kernel has a different but equally horrible hack for -detecting signal handler returns. Discovering it is left as an -exercise for the reader. - - - -

Errors, error contexts, error reporting, suppressions

-

Client malloc/free

-

Low-level memory management

-

A and V bitmaps

-

Symbol table management

-

Dealing with system calls

-

Namespace management

-

GDB attaching

-

Non-dependence on glibc or anything else

-

The leak detector

-

Performance problems

-

Continuous sanity checking

-

Tracing, or not tracing, child processes

-

Assembly glue for syscalls

- - -
- -

Extensions

- -Some comments about Stuff To Do. - -

Bugs

- -Stephan Kulow and Marc Mutz report problems with kmail in KDE 3 CVS -(RC2 ish) when run on Valgrind. Stephan has it deadlocking; Marc has -it looping at startup. I can't repro either behaviour. Needs -repro-ing and fixing. - - -

Threads

- -Doing a good job of thread support strikes me as almost a -research-level problem. The central issues are how to do fast cheap -locking of the VG_(primary_map) structure, whether or not -accesses to the individual secondary maps need locking, what -race-condition issues result, and whether the already-nasty mess that -is the signal simulator needs further hackery. - -

-I realise that threads are the most-frequently-requested feature, and -I am thinking about it all. If you have guru-level understanding of -fast mutual exclusion mechanisms and race conditions, I would be -interested in hearing from you. - - -

Verification suite

- -Directory tests/ contains various ad-hoc tests for -Valgrind. However, there is no systematic verification or regression -suite, that, for example, exercises all the stuff in -vg_memory.c, to ensure that illegal memory accesses and -undefined value uses are detected as they should be. It would be good -to have such a suite. - - -

Porting to other platforms

- -It would be great if Valgrind was ported to FreeBSD and x86 NetBSD, -and to x86 OpenBSD, if it's possible (doesn't OpenBSD use a.out-style -executables, not ELF ?) - -

-The main difficulties, for an x86-ELF platform, seem to be: - -

    -
  • You'd need to rewrite the /proc/self/maps parser - (vg_procselfmaps.c). - Easy. -

    -

  • You'd need to rewrite vg_syscall_mem.c, or, more - specifically, provide one for your OS. This is tedious, but you - can implement syscalls on demand, and the Linux kernel interface - is, for the most part, going to look very similar to the *BSD - interfaces, so it's really a copy-paste-and-modify-on-demand job. - As part of this, you'd need to supply a new - vg_kerneliface.h file. -

    -

  • You'd also need to change the syscall wrappers for Valgrind's - internal use, in vg_mylibc.c. -
- -All in all, I think a port to x86-ELF *BSDs is not really very -difficult, and in some ways I would like to see it happen, because -that would force a more clear factoring of Valgrind into platform -dependent and independent pieces. Not to mention, *BSD folks also -deserve to use Valgrind just as much as the Linux crew do. - - -

-


- -

Easy stuff which ought to be done

- -

MMX instructions

- -MMX insns should be supported, using the same trick as for FPU insns. -If the MMX registers are not used to copy uninitialised junk from one -place to another in memory, this means we don't have to actually -simulate the internal MMX unit state, so the FPU hack applies. This -should be fairly easy. - - - -

Fix stabs-info reader

- -The machinery in vg_symtab2.c which reads "stabs" style -debugging info is pretty weak. It usually correctly translates -simulated program counter values into line numbers and procedure -names, but the file name is often completely wrong. I think the -logic used to parse "stabs" entries is weak. It should be fixed. -The simplest solution, IMO, is to copy either the logic or simply the -code out of GNU binutils which does this; since GDB can clearly get it -right, binutils (or GDB?) must have code to do this somewhere. - - - - - -

BT/BTC/BTS/BTR

- -These are x86 instructions which test, complement, set, or reset, a -single bit in a word. At the moment they are both incorrectly -implemented and incorrectly instrumented. - -

-The incorrect instrumentation is due to use of helper functions. This -means we lose bit-level definedness tracking, which could wind up -giving spurious uninitialised-value use errors. The Right Thing to do -is to invent a couple of new UOpcodes, I think GET_BIT -and SET_BIT, which can be used to implement all 4 x86 -insns, get rid of the helpers, and give bit-accurate instrumentation -rules for the two new UOpcodes. - -

-I realised the other day that they are mis-implemented too. The x86 -insns take a bit-index and a register or memory location to access. -For registers the bit index clearly can only be in the range zero to -register-width minus 1, and I assumed the same applied to memory -locations too. But evidently not; for memory locations the index can -be arbitrary, and the processor will index arbitrarily into memory as -a result. This too should be fixed. Sigh. Presumably indexing -outside the immediate word is not actually used by any programs yet -tested on Valgrind, for otherwise they (presumably) would simply not -work at all. If you plan to hack on this, first check the Intel docs -to make sure my understanding is really correct. - - - -

Using PREFETCH instructions

- -Here's a small but potentially interesting project for performance -junkies. Experiments with valgrind's code generator and optimiser(s) -suggest that reducing the number of instructions executed in the -translations and mem-check helpers gives disappointingly small -performance improvements. Perhaps this is because performance of -Valgrindified code is limited by cache misses. After all, each read -in the original program now gives rise to at least three reads, one -for the VG_(primary_map), one of the resulting -secondary, and the original. Not to mention, the instrumented -translations are 13 to 14 times larger than the originals. All in all -one would expect the memory system to be hammered to hell and then -some. - -

-So here's an idea. An x86 insn involving a read from memory, after -instrumentation, will turn into ucode of the following form: -

-    ... calculate effective addr, into ta and qa ...
-    TESTVL qa             -- is the addr defined?
-    LOADV (ta), qloaded   -- fetch V bits for the addr
-    LOAD  (ta), tloaded   -- do the original load
-
-At the point where the LOADV is done, we know the actual -address (ta) from which the real LOAD will -be done. We also know that the LOADV will take around -20 x86 insns to do. So it seems plausible that doing a prefetch of -ta just before the LOADV might just avoid a -miss at the LOAD point, and that might be a significant -performance win. - -

-Prefetch insns are notoriously tempermental, more often than not -making things worse rather than better, so this would require -considerable fiddling around. It's complicated because Intels and -AMDs have different prefetch insns with different semantics, so that -too needs to be taken into account. As a general rule, even placing -the prefetches before the LOADV insn is too near the -LOAD; the ideal distance is apparently circa 200 CPU -cycles. So it might be worth having another analysis/transformation -pass which pushes prefetches as far back as possible, hopefully -immediately after the effective address becomes available. - -

-Doing too many prefetches is also bad because they soak up bus -bandwidth / cpu resources, so some cleverness in deciding which loads -to prefetch and which to not might be helpful. One can imagine not -prefetching client-stack-relative (%EBP or -%ESP) accesses, since the stack in general tends to show -good locality anyway. - -

-There's quite a lot of experimentation to do here, but I think it -might make an interesting week's work for someone. - -

-As of 15-ish March 2002, I've started to experiment with this, using -the AMD prefetch/prefetchw insns. - - - -

User-defined permission ranges

- -This is quite a large project -- perhaps a month's hacking for a -capable hacker to do a good job -- but it's potentially very -interesting. The outcome would be that Valgrind could detect a -whole class of bugs which it currently cannot. - -

-The presentation falls into two pieces. - -

-Part 1: user-defined address-range permission setting -

- -Valgrind intercepts the client's malloc, -free, etc calls, watches system calls, and watches the -stack pointer move. This is currently the only way it knows about -which addresses are valid and which not. Sometimes the client program -knows extra information about its memory areas. For example, the -client could at some point know that all elements of an array are -out-of-date. We would like to be able to convey to Valgrind this -information that the array is now addressable-but-uninitialised, so -that Valgrind can then warn if elements are used before they get new -values. - -

-What I would like are some macros like this: -

-   VALGRIND_MAKE_NOACCESS(addr, len)
-   VALGRIND_MAKE_WRITABLE(addr, len)
-   VALGRIND_MAKE_READABLE(addr, len)
-
-and also, to check that memory is addressible/initialised, -
-   VALGRIND_CHECK_ADDRESSIBLE(addr, len)
-   VALGRIND_CHECK_INITIALISED(addr, len)
-
- -

-I then include in my sources a header defining these macros, rebuild -my app, run under Valgrind, and get user-defined checks. - -

-Now here's a neat trick. It's a nuisance to have to re-link the app -with some new library which implements the above macros. So the idea -is to define the macros so that the resulting executable is still -completely stand-alone, and can be run without Valgrind, in which case -the macros do nothing, but when run on Valgrind, the Right Thing -happens. How to do this? The idea is for these macros to turn into a -piece of inline assembly code, which (1) has no effect when run on the -real CPU, (2) is easily spotted by Valgrind's JITter, and (3) no sane -person would ever write, which is important for avoiding false matches -in (2). So here's a suggestion: -

-   VALGRIND_MAKE_NOACCESS(addr, len)
-
-becomes (roughly speaking) -
-   movl addr, %eax
-   movl len,  %ebx
-   movl $1,   %ecx   -- 1 describes the action; MAKE_WRITABLE might be
-                     -- 2, etc
-   rorl $13, %ecx
-   rorl $19, %ecx
-   rorl $11, %eax
-   rorl $21, %eax
-
-The rotate sequences have no effect, and it's unlikely they would -appear for any other reason, but they define a unique byte-sequence -which the JITter can easily spot. Using the operand constraints -section at the end of a gcc inline-assembly statement, we can tell gcc -that the assembly fragment kills %eax, %ebx, -%ecx and the condition codes, so this fragment is made -harmless when not running on Valgrind, runs quickly when not on -Valgrind, and does not require any other library support. - - -

-Part 2: using it to detect interference between stack variables -

- -Currently Valgrind cannot detect errors of the following form: -

-void fooble ( void )
-{
-   int a[10];
-   int b[10];
-   a[10] = 99;
-}
-
-Now imagine rewriting this as -
-void fooble ( void )
-{
-   int spacer0;
-   int a[10];
-   int spacer1;
-   int b[10];
-   int spacer2;
-   VALGRIND_MAKE_NOACCESS(&spacer0, sizeof(int));
-   VALGRIND_MAKE_NOACCESS(&spacer1, sizeof(int));
-   VALGRIND_MAKE_NOACCESS(&spacer2, sizeof(int));
-   a[10] = 99;
-}
-
-Now the invalid write is certain to hit spacer0 or -spacer1, so Valgrind will spot the error. - -

-There are two complications. - -

-The first is that we don't want to annotate sources by hand, so the -Right Thing to do is to write a C/C++ parser, annotator, prettyprinter -which does this automatically, and run it on post-CPP'd C/C++ source. -See http://www.cacheprof.org for an example of a system which -transparently inserts another phase into the gcc/g++ compilation -route. The parser/prettyprinter is probably not as hard as it sounds; -I would write it in Haskell, a powerful functional language well -suited to doing symbolic computation, with which I am intimately -familar. There is already a C parser written in Haskell by someone in -the Haskell community, and that would probably be a good starting -point. - -

-The second complication is how to get rid of these -NOACCESS records inside Valgrind when the instrumented -function exits; after all, these refer to stack addresses and will -make no sense whatever when some other function happens to re-use the -same stack address range, probably shortly afterwards. I think I -would be inclined to define a special stack-specific macro -

-   VALGRIND_MAKE_NOACCESS_STACK(addr, len)
-
-which causes Valgrind to record the client's %ESP at the -time it is executed. Valgrind will then watch for changes in -%ESP and discard such records as soon as the protected -area is uncovered by an increase in %ESP. I hesitate -with this scheme only because it is potentially expensive, if there -are hundreds of such records, and considering that changes in -%ESP already require expensive messing with stack access -permissions. - -

-This is probably easier and more robust than for the instrumenter -program to try and spot all exit points for the procedure and place -suitable deallocation annotations there. Plus C++ procedures can -bomb out at any point if they get an exception, so spotting return -points at the source level just won't work at all. - -

-Although some work, it's all eminently doable, and it would make -Valgrind into an even-more-useful tool. - - -

- - -


- -

Cache profiling

-Valgrind is a very nice platform for doing cache profiling and other kinds of -simulation, because it converts horrible x86 instructions into nice clean -RISC-like UCode. For example, for cache profiling we are interested in -instructions that read and write memory; in UCode there are only four -instructions that do this: LOAD, STORE, -FPU_R and FPU_W. By contrast, because of the x86 -addressing modes, almost every instruction can read or write memory.

- -Most of the cache profiling machinery is in the file -vg_cachesim.c.

- -These notes are a somewhat haphazard guide to how Valgrind's cache profiling -works.

- -

Cost centres

-Valgrind gathers cache profiling about every instruction executed, -individually. Each instruction has a cost centre associated with it. -There are two kinds of cost centre: one for instructions that don't reference -memory (iCC), and one for instructions that do -(idCC): - -
-typedef struct _CC {
-   ULong a;
-   ULong m1;
-   ULong m2;
-} CC;
-
-typedef struct _iCC {
-   /* word 1 */
-   UChar tag;
-   UChar instr_size;
-
-   /* words 2+ */
-   Addr instr_addr;
-   CC I;
-} iCC;
-   
-typedef struct _idCC {
-   /* word 1 */
-   UChar tag;
-   UChar instr_size;
-   UChar data_size;
-
-   /* words 2+ */
-   Addr instr_addr;
-   CC I; 
-   CC D; 
-} idCC; 
-
- -Each CC has three fields a, m1, -m2 for recording references, level 1 misses and level 2 misses. -Each of these is a 64-bit ULong -- the numbers can get very large, -ie. greater than 4.2 billion allowed by a 32-bit unsigned int.

- -A iCC has one CC for instruction cache accesses. A -idCC has two, one for instruction cache accesses, and one for data -cache accesses.

- -The iCC and dCC structs also store unchanging -information about the instruction: -

    -
  • An instruction-type identification tag (explained below)
  • -

  • Instruction size
  • -

  • Data reference size (idCC only)
  • -

  • Instruction address
  • -

- -Note that data address is not one of the fields for idCC. This is -because for many memory-referencing instructions the data address can change -each time it's executed (eg. if it uses register-offset addressing). We have -to give this item to the cache simulation in a different way (see -Instrumentation section below). Some memory-referencing instructions do always -reference the same address, but we don't try to treat them specialy in order to -keep things simple.

- -Also note that there is only room for recording info about one data cache -access in an idCC. So what about instructions that do a read then -a write, such as: - -

inc %(esi)
- -In a write-allocate cache, as simulated by Valgrind, the write cannot miss, -since it immediately follows the read which will drag the block into the cache -if it's not already there. So the write access isn't really interesting, and -Valgrind doesn't record it. This means that Valgrind doesn't measure -memory references, but rather memory references that could miss in the cache. -This behaviour is the same as that used by the AMD Athlon hardware counters. -It also has the benefit of simplifying the implementation -- instructions that -read and write memory can be treated like instructions that read memory.

- -

Storing cost-centres

-Cost centres are stored in a way that makes them very cheap to lookup, which is -important since one is looked up for every original x86 instruction -executed.

- -Valgrind does JIT translations at the basic block level, and cost centres are -also setup and stored at the basic block level. By doing things carefully, we -store all the cost centres for a basic block in a contiguous array, and lookup -comes almost for free.

- -Consider this part of a basic block (for exposition purposes, pretend it's an -entire basic block): - -

-movl $0x0,%eax
-movl $0x99, -4(%ebp)
-
- -The translation to UCode looks like this: - -
-MOVL      $0x0, t20
-PUTL      t20, %EAX
-INCEIPo   $5
-
-LEA1L     -4(t4), t14
-MOVL      $0x99, t18
-STL       t18, (t14)
-INCEIPo   $7
-
- -The first step is to allocate the cost centres. This requires a preliminary -pass to count how many x86 instructions were in the basic block, and their -types (and thus sizes). UCode translations for single x86 instructions are -delimited by the INCEIPo instruction, the argument of which gives -the byte size of the instruction (note that lazy INCEIP updating is turned off -to allow this).

- -We can tell if an x86 instruction references memory by looking for -LDL and STL UCode instructions, and thus what kind of -cost centre is required. From this we can determine how many cost centres we -need for the basic block, and their sizes. We can then allocate them in a -single array.

- -Consider the example code above. After the preliminary pass, we know we need -two cost centres, one iCC and one dCC. So we -allocate an array to store these which looks like this: - -

-|(uninit)|      tag         (1 byte)
-|(uninit)|      instr_size  (1 bytes)
-|(uninit)|      (padding)   (2 bytes)
-|(uninit)|      instr_addr  (4 bytes)
-|(uninit)|      I.a         (8 bytes)
-|(uninit)|      I.m1        (8 bytes)
-|(uninit)|      I.m2        (8 bytes)
-
-|(uninit)|      tag         (1 byte)
-|(uninit)|      instr_size  (1 byte)
-|(uninit)|      data_size   (1 byte)
-|(uninit)|      (padding)   (1 byte)
-|(uninit)|      instr_addr  (4 bytes)
-|(uninit)|      I.a         (8 bytes)
-|(uninit)|      I.m1        (8 bytes)
-|(uninit)|      I.m2        (8 bytes)
-|(uninit)|      D.a         (8 bytes)
-|(uninit)|      D.m1        (8 bytes)
-|(uninit)|      D.m2        (8 bytes)
-
- -(We can see now why we need tags to distinguish between the two types of cost -centres.)

- -We also record the size of the array. We look up the debug info of the first -instruction in the basic block, and then stick the array into a table indexed -by filename and function name. This makes it easy to dump the information -quickly to file at the end.

- -

Instrumentation

-The instrumentation pass has two main jobs: - -
    -
  1. Fill in the gaps in the allocated cost centres.
  2. -

  3. Add UCode to call the cache simulator for each instruction.
  4. -

- -The instrumentation pass steps through the UCode and the cost centres in -tandem. As each original x86 instruction's UCode is processed, the appropriate -gaps in the instructions cost centre are filled in, for example: - -
-|INSTR_CC|      tag         (1 byte)
-|5       |      instr_size  (1 bytes)
-|(uninit)|      (padding)   (2 bytes)
-|i_addr1 |      instr_addr  (4 bytes)
-|0       |      I.a         (8 bytes)
-|0       |      I.m1        (8 bytes)
-|0       |      I.m2        (8 bytes)
-
-|WRITE_CC|      tag         (1 byte)
-|7       |      instr_size  (1 byte)
-|4       |      data_size   (1 byte)
-|(uninit)|      (padding)   (1 byte)
-|i_addr2 |      instr_addr  (4 bytes)
-|0       |      I.a         (8 bytes)
-|0       |      I.m1        (8 bytes)
-|0       |      I.m2        (8 bytes)
-|0       |      D.a         (8 bytes)
-|0       |      D.m1        (8 bytes)
-|0       |      D.m2        (8 bytes)
-
- -(Note that this step is not performed if a basic block is re-translated; see -here for more information.)

- -GCC inserts padding before the instr_size field so that it is word -aligned.

- -The instrumentation added to call the cache simulation function looks like this -(instrumentation is indented to distinguish it from the original UCode): - -

-MOVL      $0x0, t20
-PUTL      t20, %EAX
-  PUSHL     %eax
-  PUSHL     %ecx
-  PUSHL     %edx
-  MOVL      $0x4091F8A4, t46  # address of 1st CC
-  PUSHL     t46
-  CALLMo    $0x12             # second cachesim function
-  CLEARo    $0x4
-  POPL      %edx
-  POPL      %ecx
-  POPL      %eax
-INCEIPo   $5
-
-LEA1L     -4(t4), t14
-MOVL      $0x99, t18
-  MOVL      t14, t42
-STL       t18, (t14)
-  PUSHL     %eax
-  PUSHL     %ecx
-  PUSHL     %edx
-  PUSHL     t42
-  MOVL      $0x4091F8C4, t44  # address of 2nd CC
-  PUSHL     t44
-  CALLMo    $0x13             # second cachesim function
-  CLEARo    $0x8
-  POPL      %edx
-  POPL      %ecx
-  POPL      %eax
-INCEIPo   $7
-
- -Consider the first instruction's UCode. Each call is surrounded by three -PUSHL and POPL instructions to save and restore the -caller-save registers. Then the address of the instruction's cost centre is -pushed onto the stack, to be the first argument to the cache simulation -function. The address is known at this point because we are doing a -simultaneous pass through the cost centre array. This means the cost centre -lookup for each instruction is almost free (just the cost of pushing an -argument for a function call). Then the call to the cache simulation function -for non-memory-reference instructions is made (note that the -CALLMo UInstruction takes an offset into a table of predefined -functions; it is not an absolute address), and the single argument is -CLEARed from the stack.

- -The second instruction's UCode is similar. The only difference is that, as -mentioned before, we have to pass the address of the data item referenced to -the cache simulation function too. This explains the MOVL t14, -t42 and PUSHL t42 UInstructions. (Note that the seemingly -redundant MOVing will probably be optimised away during register -allocation.)

- -Note that instead of storing unchanging information about each instruction -(instruction size, data size, etc) in its cost centre, we could have passed in -these arguments to the simulation function. But this would slow the calls down -(two or three extra arguments pushed onto the stack). Also it would bloat the -UCode instrumentation by amounts similar to the space required for them in the -cost centre; bloated UCode would also fill the translation cache more quickly, -requiring more translations for large programs and slowing them down more.

- - -

Handling basic block retranslations

-The above description ignores one complication. Valgrind has a limited size -cache for basic block translations; if it fills up, old translations are -discarded. If a discarded basic block is executed again, it must be -re-translated.

- -However, we can't use this approach for profiling -- we can't throw away cost -centres for instructions in the middle of execution! So when a basic block is -translated, we first look for its cost centre array in the hash table. If -there is no cost centre array, it must be the first translation, so we proceed -as described above. But if there is a cost centre array already, it must be a -retranslation. In this case, we skip the cost centre allocation and -initialisation steps, but still do the UCode instrumentation step.

- -

The cache simulation

-The cache simulation is fairly straightforward. It just tracks which memory -blocks are in the cache at the moment (it doesn't track the contents, since -that is irrelevant).

- -The interface to the simulation is quite clean. The functions called from the -UCode contain calls to the simulation functions in the files -vg_cachesim_{I1,D1,L2}.c; these calls are inlined so that only -one function call is done per simulated x86 instruction. The file -vg_cachesim.c simply #includes the three files -containing the simulation, which makes plugging in new cache simulations is -very easy -- you just replace the three files and recompile.

- -

Output

-Output is fairly straightforward, basically printing the cost centre for every -instruction, grouped by files and functions. Total counts (eg. total cache -accesses, total L1 misses) are calculated when traversing this structure rather -than during execution, to save time; the cache simulation functions are called -so often that even one or two extra adds can make a sizeable difference.

- -Input file has the following format: - -

-file         ::= desc_line* cmd_line events_line data_line+ summary_line
-desc_line    ::= "desc:" ws? non_nl_string
-cmd_line     ::= "cmd:" ws? cmd
-events_line  ::= "events:" ws? (event ws)+
-data_line    ::= file_line | fn_line | count_line
-file_line    ::= ("fl=" | "fi=" | "fe=") filename
-fn_line      ::= "fn=" fn_name
-count_line   ::= line_num ws? (count ws)+
-summary_line ::= "summary:" ws? (count ws)+
-count        ::= num | "."
-
- -Where: - -
    -
  • non_nl_string is any string not containing a newline.
  • -

  • cmd is a command line invocation.
  • -

  • filename and fn_name can be anything.
  • -

  • num and line_num are decimal numbers.
  • -

  • ws is whitespace.
  • -

  • nl is a newline.
  • -

- -The contents of the "desc:" lines is printed out at the top of the summary. -This is a generic way of providing simulation specific information, eg. for -giving the cache configuration for cache simulation.

- -Counts can be "." to represent "N/A", eg. the number of write misses for an -instruction that doesn't write to memory.

- -The number of counts in each line and the -summary_line should not exceed the number of events in the -event_line. If the number in each line is less, -vg_annotate treats those missing as though they were a "." entry.

- -A file_line changes the current file name. A fn_line -changes the current function name. A count_line contains counts -that pertain to the current filename/fn_name. A "fn=" file_line -and a fn_line must appear before any count_lines to -give the context of the first count_lines.

- -Each file_line should be immediately followed by a -fn_line. "fi=" file_lines are used to switch -filenames for inlined functions; "fe=" file_lines are similar, but -are put at the end of a basic block in which the file name hasn't been switched -back to the original file name. (fi and fe lines behave the same, they are -only distinguished to help debugging.)

- - -

Summary of performance features

-Quite a lot of work has gone into making the profiling as fast as possible. -This is a summary of the important features: - -
    -
  • The basic block-level cost centre storage allows almost free cost centre - lookup.
  • - -

  • Only one function call is made per instruction simulated; even this - accounts for a sizeable percentage of execution time, but it seems - unavoidable if we want flexibility in the cache simulator.
  • - -

  • Unchanging information about an instruction is stored in its cost centre, - avoiding unnecessary argument pushing, and minimising UCode - instrumentation bloat.
  • - -

  • Summary counts are calculated at the end, rather than during - execution.
  • - -

  • The cachegrind.out output files can contain huge amounts of - information; file format was carefully chosen to minimise file - sizes.
  • -

- - -

Annotation

-Annotation is done by vg_annotate. It is a fairly straightforward Perl script -that slurps up all the cost centres, and then runs through all the chosen -source files, printing out cost centres with them. It too has been carefully -optimised. - - -

Similar work, extensions

-It would be relatively straightforward to do other simulations and obtain -line-by-line information about interesting events. A good example would be -branch prediction -- all branches could be instrumented to interact with a -branch prediction simulator, using very similar techniques to those described -above.

- -In particular, vg_annotate would not need to change -- the file format is such -that it is not specific to the cache simulation, but could be used for any kind -of line-by-line information. The only part of vg_annotate that is specific to -the cache simulation is the name of the input file -(cachegrind.out), although it would be very simple to add an -option to control this.

- - - diff --git a/none/Makefile.am b/none/Makefile.am deleted file mode 100644 index 60553ddac6..0000000000 --- a/none/Makefile.am +++ /dev/null @@ -1,110 +0,0 @@ -SUBDIRS = demangle . docs tests - -CFLAGS = $(WERROR) -DVG_LIBDIR="\"$(libdir)"\" \ - -Winline -Wall -Wshadow -O -fomit-frame-pointer -g - -valdir = $(libdir)/valgrind - -LDFLAGS = -Wl,-z -Wl,initfirst - -INCLUDES = -I$(srcdir)/demangle - -bin_SCRIPTS = valgrind cachegrind vg_annotate - -SUPP_FILES = glibc-2.1.supp glibc-2.2.supp xfree-3.supp xfree-4.supp - -val_DATA = $(SUPP_FILES) default.supp - -BUILT_SOURCES = default.supp - -default.supp: $(SUPP_FILES) - -bzdist: dist - gunzip -c $(PACKAGE)-$(VERSION).tar.gz | bzip2 > $(PACKAGE)-$(VERSION).tar.bz2 - -EXTRA_DIST = $(val_DATA) \ - PATCHES_APPLIED ACKNOWLEDGEMENTS \ - README_KDE3_FOLKS README_PACKAGERS \ - README_MISSING_SYSCALL_OR_IOCTL TODO dosyms vg_libpthread.vs \ - valgrind.spec valgrind.spec.in - -val_PROGRAMS = valgrind.so valgrinq.so libpthread.so - -libpthread_so_SOURCES = vg_libpthread.c vg_libpthread_unimp.c - -valgrinq_so_SOURCES = vg_valgrinq_dummy.c - -valgrind_so_SOURCES = \ - vg_clientfuncs.c \ - vg_scheduler.c \ - vg_cachesim.c \ - vg_clientmalloc.c \ - vg_clientperms.c \ - vg_demangle.c \ - vg_dispatch.S \ - vg_errcontext.c \ - vg_execontext.c \ - vg_from_ucode.c \ - vg_helpers.S \ - vg_main.c \ - vg_malloc2.c \ - vg_memory.c \ - vg_messages.c \ - vg_mylibc.c \ - vg_procselfmaps.c \ - vg_profile.c \ - vg_signals.c \ - vg_startup.S \ - vg_symtab2.c \ - vg_syscall_mem.c \ - vg_syscall.S \ - vg_to_ucode.c \ - vg_translate.c \ - vg_transtab.c \ - vg_vtagops.c - -valgrind_so_LDADD = \ - demangle/cp-demangle.o \ - demangle/cplus-dem.o \ - demangle/dyn-string.o \ - demangle/safe-ctype.o - -include_HEADERS = valgrind.h - -noinst_HEADERS = \ - vg_cachesim_gen.c \ - vg_cachesim_I1.c \ - vg_cachesim_D1.c \ - vg_cachesim_L2.c \ - vg_kerneliface.h \ - vg_include.h \ - vg_constants.h \ - vg_unsafe.h - -MANUAL_DEPS = $(noinst_HEADERS) $(include_HEADERS) - -vg_memory.o: vg_memory.c $(MANUAL_DEPS) - $(COMPILE) -O2 @PREFERRED_STACK_BOUNDARY@ -c $< - -vg_clientfuncs.o: vg_clientfuncs.c $(MANUAL_DEPS) - $(COMPILE) -fno-omit-frame-pointer -c $< - -vg_libpthread.o: vg_libpthread.c $(MANUAL_DEPS) - $(COMPILE) -fno-omit-frame-pointer -c $< - -valgrind.so$(EXEEXT): $(valgrind_so_OBJECTS) - $(CC) $(CFLAGS) $(LDFLAGS) -shared -o valgrind.so \ - $(valgrind_so_OBJECTS) $(valgrind_so_LDADD) - -valgrinq.so$(EXEEXT): $(valgrinq_so_OBJECTS) - $(CC) $(CFLAGS) -shared -o valgrinq.so $(valgrinq_so_OBJECTS) - -libpthread.so$(EXEEXT): $(libpthread_so_OBJECTS) $(srcdir)/vg_libpthread.vs - $(CC) -Wall -Werror -g -O -shared -fpic -o libpthread.so \ - $(libpthread_so_OBJECTS) \ - -Wl,-version-script $(srcdir)/vg_libpthread.vs - -install-exec-hook: - $(mkinstalldirs) $(DESTDIR)$(valdir) - rm -f $(DESTDIR)$(valdir)/libpthread.so.0 - $(LN_S) libpthread.so $(DESTDIR)$(valdir)/libpthread.so.0