]> git.ipfire.org Git - thirdparty/valgrind.git/commitdiff
Fix 338252 - building valgrind with -flto (link time optimisation) fails
authorPhilippe Waroquiers <philippe.waroquiers@skynet.be>
Sun, 11 Mar 2018 21:26:08 +0000 (22:26 +0100)
committerPhilippe Waroquiers <philippe.waroquiers@skynet.be>
Sun, 18 Mar 2018 12:53:38 +0000 (13:53 +0100)
* Addition of a new configure option --enable-lto=yes or --enable-lto=no
  Default value is --enable-lto=no, as the build is significantly slower,
  so is not appropriate for valgrind development : this should be used
  only on buildbots and/or by packagers.

* Some files containins asm functions have to be compiled without lto:
    coregrind/m_libcsetjmp.c
    coregrind/m_main.c
  If these are compiled with lto, that gives undefined symbols at link time.
  The files to compile without lto are
    coregrind/m_libcsetjmp.c
    coregrind/m_main.c

  To compile these files with other options, a noinst target lib is defined.
  The objects of this library are then added to the libcoregrind.

* memcheck/mc_main.c : move the handwritten asm helpers to mc_main_asm.c.
  This avoids undefined symbols on some toolchains. Due to this,
  the preprocessor symbols that activate the fast or asm memcheck helpers
  are moved to mc_include.h
  Platforms with handwritten helpers will also have the memcheck primary
  map defined non static.

* In VEX, auxprogs/genoffsets.c also has to be compiled without lto,
  as the asm produced by the compiler is post-processed to produce
  pub/libvex_guest_offsets.h. lto not producing asm means the generation
  fails if we used -flto to compile this file.

* all the various Makefile*am are modified to use LTO_CFLAGS for
  (most) targets. LTO_CFLAGS is empty when --enable-lto=no,
  otherwise is set to the flags needed for gcc.
  If --enable-lto=no, LTO_AR and LTO_RANLIB are the standard AR and RANLIB,
  otherwise they are the lto capable versions (gcc-ar and gcc-ranlib).

* This has been tested on:
    debian 9.4/gcc 6.3.0/amd64+x86
    rhel 7.4/gcc 6.4.0/amd64
    ubuntu 17.10/gcc 7.2.0/amd64+x86
    fedora26/gcc 7.3.1/s390x

  No regressions on the above.

20 files changed:
Makefile.vex.am
NEWS
README_PACKAGERS
cachegrind/Makefile.am
callgrind/Makefile.am
configure.ac
coregrind/Makefile.am
coregrind/m_libcsetjmp.c
drd/Makefile.am
exp-bbv/Makefile.am
exp-dhat/Makefile.am
exp-sgcheck/Makefile.am
helgrind/Makefile.am
lackey/Makefile.am
massif/Makefile.am
memcheck/Makefile.am
memcheck/mc_include.h
memcheck/mc_main.c
memcheck/mc_main_asm.c [new file with mode: 0644]
none/Makefile.am

index 4ad5ffa67f566fa50cf8396d323540a287599700..64f837352b86e6219c66e55c102ac5a0322596aa 100644 (file)
@@ -68,6 +68,10 @@ else
 CFLAGS_FOR_GENOFFSETS = $(CFLAGS)
 endif
 
+# for VEX building, use the LTO versions, in case they differ from non lto versions
+AR = ${LTO_AR}
+RANLIB = ${LTO_RANLIB}
+
 # This is very uggerly.  Need to sed out both "xyzzyN" and
 # "xyzzy$N" since gcc on different targets emits the constants
 # differently -- with a leading $ on x86/amd64 but none on ppc32/64.
@@ -86,7 +90,7 @@ pub/libvex_guest_offsets.h: auxprogs/genoffsets.c \
        rm -f auxprogs/genoffsets.s
        $(mkdir_p) auxprogs pub
        $(CC) $(CFLAGS_FOR_GENOFFSETS) \
-             $(LIBVEX_CFLAGS) \
+             $(LIBVEX_CFLAGS_NO_LTO) \
              $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) \
                                -O -S -o auxprogs/genoffsets.s \
                                         $(srcdir)/auxprogs/genoffsets.c
@@ -162,10 +166,13 @@ LIBVEX_SOURCES_COMMON = \
 
 LIBVEXMULTIARCH_SOURCES = priv/multiarch_main_main.c
 
-LIBVEX_CFLAGS = \
+LIBVEX_CFLAGS_NO_LTO = \
        -Wbad-function-cast \
        -fstrict-aliasing
 
+LIBVEX_CFLAGS = ${LTO_CFLAGS} \
+       ${LIBVEX_CFLAGS_NO_LTO}
+
 libvex_@VGCONF_ARCH_PRI@_@VGCONF_OS@_a_SOURCES  = $(LIBVEX_SOURCES_COMMON)
 libvex_@VGCONF_ARCH_PRI@_@VGCONF_OS@_a_CPPFLAGS = \
         $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) -Ipriv
diff --git a/NEWS b/NEWS
index 3d04b75943098a9700a68551f525151ee409d897..f762530a92ae84013469cb126d547fe5091fa042 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -35,6 +35,11 @@ support for X86/macOS 10.13, AMD64/macOS 10.13.
 
 * ==================== OTHER CHANGES ====================
 
+* A new configure option --enable-lto=yes allows to build Valgrind
+  with link time optimisation. If the toolchain supports it,
+  this produces a smaller/faster Valgrind (up to 10%).
+  Note that if you are doing some Valgrind development, --enable-lto=yes
+  significantly slows down the build.
 
 * ==================== FIXED BUGS ====================
 
@@ -52,6 +57,7 @@ where XXXXXX is the bug number as listed below.
 79362   Debug info is lost for .so files when they are dlclose'd
 208052  strlcpy error when n = 0
 255603  exp-sgcheck Assertion '!already_present' failed
+338252 - building valgrind with -flto (link time optimisation) fails
 376257  helgrind history full speed up using a cached stack
 379373  Fix syscall param msg->desc.port.name points to uninitialised byte(s)
         on macOS 10.12
@@ -91,7 +97,7 @@ where XXXXXX is the bug number as listed below.
 387712  s390x cgijnl reports Conditional jump depends on uninitialised value
 387773  .gnu_debugaltlink paths resolve relative to .debug file, not symlink
 388862  Add replacements for wmemchr and wcsnlen on Linux
-389373  exp-sgcheck the 'impossible' happened as st_LoadG is not instrumented
+389373  exp-sgcheck the 'impossible' happened as Ist_LoadG is not instrumented
 389065  valgrind meets gcc flag -Wlogical-op
 390723  make xtree dump files world wide readable, similar to log files
 
index 28ee8ba129d0c54594542b7555410cfc8c52b20f..e86c15309bd757eeefc1ce05f498134ad3348d82 100644 (file)
@@ -5,6 +5,9 @@ building binary distributions of Valgrind.
 Thanks for taking the time and effort to make a binary distribution of
 Valgrind.  The following notes may save you some trouble.
 
+-- If your toolchain (compiler, linker) support lto, using the configure
+   option --enable-lto=yes will produce a smaller/faster valgrind
+   (up to 10%).
 
 -- Do not ship your Linux distro with a completely stripped
    /lib/ld.so.  At least leave the debugging symbol names on -- line
@@ -28,7 +31,7 @@ Valgrind.  The following notes may save you some trouble.
    not any debuginfo or extra symbols for any other libraries.
 
 
--- (Unfortunate but true) When you configure to build with the 
+-- (Unfortunate but true) When you configure to build with the
    --prefix=/foo/bar/xyzzy option, the prefix /foo/bar/xyzzy gets
    baked into valgrind.  The consequence is that you _must_ install
    valgrind at the location specified in the prefix.  If you don't,
index a587d2412459419c88e23e296197949b4490aed1..f8447a17ce5adb92f2e58e5c8bb8b97969b30f62 100644 (file)
@@ -53,7 +53,7 @@ cachegrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES      = \
        $(CACHEGRIND_SOURCES_COMMON)
 cachegrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS     = \
        $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
-cachegrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       = \
+cachegrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       = $(LTO_CFLAGS) \
        $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
 cachegrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \
        $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@)
@@ -73,7 +73,7 @@ cachegrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES      = \
        $(CACHEGRIND_SOURCES_COMMON)
 cachegrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS     = \
        $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
-cachegrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = \
+cachegrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = $(LTO_CFLAGS)\
        $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
 cachegrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \
        $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@)
index 56e8a04c239733514006e3dcae95444ad8088a1c..7cecb44b3e788a8dc34b39676470fca25dc66f07 100644 (file)
@@ -53,7 +53,7 @@ callgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES      = \
        $(CALLGRIND_SOURCES_COMMON)
 callgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS     = \
        $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
-callgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       = \
+callgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       = $(LTO_CFLAGS) \
        $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) $(CALLGRIND_CFLAGS_COMMON)
 callgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \
        $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@)
@@ -73,7 +73,7 @@ callgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES      = \
        $(CALLGRIND_SOURCES_COMMON)
 callgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS     = \
        $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
-callgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = \
+callgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = $(LTO_CFLAGS) \
        $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) $(CALLGRIND_CFLAGS_COMMON)
 callgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \
        $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@)
index 3988d62c63197277609f5e2aee4cdb48bd6d68b1..03704981de8ba5777c14db1d997ac4c281757a9b 100644 (file)
@@ -45,6 +45,12 @@ AC_PROG_CXX
 #         AC_SUBST([OBJCFLAGS])
 #        ])
 AC_PROG_RANLIB
+# Set LTO_RANLIB variable to an lto enabled ranlib
+if test "x$LTO_RANLIB" = "x"; then
+  AC_PATH_PROGS([LTO_RANLIB], [gcc-ranlib])
+fi
+AC_ARG_VAR([LTO_RANLIB],[Library indexer command for link time optimisation])
+
 # provide a very basic definition for AC_PROG_SED if it's not provided by
 # autoconf (as e.g. in autoconf 2.59).
 m4_ifndef([AC_PROG_SED],
@@ -60,6 +66,13 @@ if test "x$AR" = "x"; then
 fi
 AC_ARG_VAR([AR],[Archiver command])
 
+# same for LTO_AR variable for lto enabled archiver
+if test "x$LTO_AR" = "x"; then
+  AC_PATH_PROGS([LTO_AR], [gcc-ar])
+fi
+AC_ARG_VAR([LTO_AR],[Archiver command for link time optimisation])
+
+
 # Check for the compiler support
 if test "${GCC}" != "yes" ; then
    AC_MSG_ERROR([Valgrind relies on GCC to be compiled])
@@ -1945,6 +1958,55 @@ AM_CONDITIONAL(HAS_MLONG_DOUBLE_128, test x$ac_compiler_supports_mlong_double_12
 FLAG_MLONG_DOUBLE_128="-mlong-double-128"
 AC_SUBST(FLAG_MLONG_DOUBLE_128)
 
+# does this toolchain support lto ?
+# Not checked for if --enable-lto=no was given, or if LTO_AR or LTO_RANLIG
+# are not defined
+# If not enable-lto=* arg is provided, default to no, as  lto builds are
+# a lot slower, and so not appropriate for Valgrind developments.
+# --enable-lto=yes should be used by distro packagers.
+AC_CACHE_CHECK([for using the link time optimisation], vg_cv_lto,
+   [AC_ARG_ENABLE(lto,
+      [  --enable-lto          enables building with link time optimisation],
+      [vg_cv_lto=$enableval],
+      [vg_cv_lto=no])])
+
+if test "x${vg_cv_lto}" != "xno" -a "x${LTO_AR}" != "x" -a "x${LTO_RANLIB}" != "x"; then
+AC_MSG_CHECKING([if toolchain accepts lto])
+safe_CFLAGS=$CFLAGS
+TEST_LTO_CFLAGS="-flto -flto-partition=one -fuse-linker-plugin"
+# Note : using 'one' partition is giving a slightly smaller/faster memcheck
+# and ld/lto-trans1 still needs a reasonable memory (about 0.5GB) when linking.
+CFLAGS="$TEST_LTO_CFLAGS -Werror"
+
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[
+  extern void somefun(void);
+  somefun();
+  return 0;
+]])], [
+LTO_CFLAGS=$TEST_LTO_CFLAGS
+AC_MSG_RESULT([yes])
+], [
+LTO_CFLAGS=""
+AC_MSG_RESULT([no])
+])
+CFLAGS=$safe_CFLAGS
+fi
+
+AC_SUBST(LTO_CFLAGS)
+
+# if we could not compile with lto args, or lto was disabled,
+# then set LTO_AR/LTO_RANLIB to the non lto values
+# define in config.h ENABLE_LTO (not needed by the code currently, but
+# this guarantees we recompile everything if we re-configure and rebuild
+# in a build dir previously build with another value of --enable-lto
+if test "x${LTO_CFLAGS}" = "x"; then
+   LTO_AR=${AR}
+   LTO_RANLIB=${RANLIB}
+   vg_cv_lto=no
+else
+   vg_cv_lto=yes
+   AC_DEFINE([ENABLE_LTO], 1, [configured to build with lto link time optimisation])
+fi
 
 # Convenience function to check whether GCC supports a particular
 # warning option. Takes two arguments,
@@ -4585,6 +4647,7 @@ cat<<EOF
          Primary build arch: ${VGCONF_ARCH_PRI}
        Secondary build arch: ${VGCONF_ARCH_SEC}
                    Build OS: ${VGCONF_OS}
+     Link Time Optimisation: ${vg_cv_lto}
        Primary build target: ${VGCONF_PLATFORM_PRI_CAPS}
      Secondary build target: ${VGCONF_PLATFORM_SEC_CAPS}
            Platform variant: ${VGCONF_PLATVARIANT}
index e55ab9aa792406cf09dbca9b0d65fc54294de4d3..4acacae00b9c69ac981be7905def04c48c13fd11 100644 (file)
@@ -50,8 +50,12 @@ valgrind_SOURCES = \
        m_debuglog.c
 endif
 
+# for valgrind coregrind building, use the LTO versions, in case they differ from non lto versions
+AR = ${LTO_AR}
+RANLIB = ${LTO_RANLIB}
+
 valgrind_CPPFLAGS  = $(AM_CPPFLAGS_PRI)
-valgrind_CFLAGS    = $(AM_CFLAGS_PRI)
+valgrind_CFLAGS    = $(AM_CFLAGS_PRI) $(LTO_CFLAGS)
 valgrind_CCASFLAGS = $(AM_CCASFLAGS_PRI)
 valgrind_LDFLAGS   = $(AM_CFLAGS_PRI) @LIB_UBSAN@
 # If there is no secondary platform, and the platforms include x86-darwin,
@@ -90,7 +94,7 @@ vgdb_SOURCES += vgdb-invoker-solaris.c
 endif
 
 vgdb_CPPFLAGS  = $(AM_CPPFLAGS_PRI)
-vgdb_CFLAGS    = $(AM_CFLAGS_PRI)
+vgdb_CFLAGS    = $(AM_CFLAGS_PRI) $(LTO_CFLAGS)
 vgdb_CCASFLAGS = $(AM_CCASFLAGS_PRI)
 vgdb_LDFLAGS   = $(AM_CFLAGS_PRI) @LIB_UBSAN@
 if VGCONF_PLATVARIANT_IS_ANDROID
@@ -308,10 +312,8 @@ COREGRIND_SOURCES_COMMON = \
        m_libcfile.c \
        m_libcprint.c \
        m_libcproc.c \
-       m_libcsetjmp.c \
        m_libcsignal.c \
        m_machine.c \
-       m_main.c \
        m_mallocfree.c \
        m_options.c \
        m_oset.c \
@@ -461,13 +463,43 @@ COREGRIND_SOURCES_COMMON = \
        m_ume/main.c \
        m_ume/script.c
 
+# The below files cannot be compiled with lto, otherwise that gives
+# undefined symbols at link time. So, define a noinst library to
+# build the needed .o with specific flags.
+# These objects are added to the libcoregrind library.
+NOLTO_COREGRIND_SOURCES_COMMON = \
+       m_libcsetjmp.c \
+       m_main.c
+noinst_LIBRARIES = libnolto_coregrind-@VGCONF_ARCH_PRI@-@VGCONF_OS@.a
+libnolto_coregrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_a_SOURCES = \
+    $(NOLTO_COREGRIND_SOURCES_COMMON)
+libnolto_coregrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_a_CPPFLAGS = \
+    $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
+libnolto_coregrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_a_CFLAGS = \
+    $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
+libnolto_coregrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_a_CCASFLAGS = \
+    $(AM_CCASFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
+
+if VGCONF_HAVE_PLATFORM_SEC
+noinst_LIBRARIES += libnolto_coregrind-@VGCONF_ARCH_SEC@-@VGCONF_OS@.a
+libnolto_coregrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_a_SOURCES = \
+    $(NOLTO_COREGRIND_SOURCES_COMMON)
+libnolto_coregrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_a_CPPFLAGS = \
+    $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
+libnolto_coregrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_a_CFLAGS = \
+    $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
+libnolto_coregrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_a_CCASFLAGS = \
+    $(AM_CCASFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
+endif
+
+
 libcoregrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_a_SOURCES = \
     $(COREGRIND_SOURCES_COMMON)
 nodist_libcoregrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_a_SOURCES = \
     $(BUILT_SOURCES)
 libcoregrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_a_CPPFLAGS = \
     $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
-libcoregrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_a_CFLAGS = \
+libcoregrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_a_CFLAGS = $(LTO_CFLAGS) \
     $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
 libcoregrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_a_CCASFLAGS = \
     $(AM_CCASFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
@@ -477,6 +509,8 @@ libcoregrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_a_SOURCES += \
 libcoregrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_a_CFLAGS += \
     -DENABLE_LINUX_TICKET_LOCK
 endif
+libcoregrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_a_LIBADD = \
+    $(libnolto_coregrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_a_OBJECTS)
 
 if VGCONF_HAVE_PLATFORM_SEC
 libcoregrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_a_SOURCES = \
@@ -485,7 +519,7 @@ nodist_libcoregrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_a_SOURCES = \
     $(BUILT_SOURCES)
 libcoregrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_a_CPPFLAGS = \
     $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
-libcoregrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_a_CFLAGS = \
+libcoregrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_a_CFLAGS =  $(LTO_CFLAGS) \
     $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
 libcoregrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_a_CCASFLAGS = \
     $(AM_CCASFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
@@ -495,6 +529,8 @@ libcoregrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_a_SOURCES += \
 libcoregrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_a_CFLAGS += \
     -DENABLE_LINUX_TICKET_LOCK
 endif
+libcoregrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_a_LIBADD = \
+    $(libnolto_coregrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_a_OBJECTS)
 endif
 
 #----------------------------------------------------------------------------
index 68c101e8cb3c445fb3794fc50b48760a902184ee..c731806402f2e515f89845421385e8621b9dd991 100644 (file)
@@ -2,7 +2,7 @@
 /*--------------------------------------------------------------------*/
 /*--- A minimal setjmp/longjmp implementation.      m_libcsetjmp.c ---*/
 /*--------------------------------------------------------------------*/
+
 /*
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
 /* Contributed by Julian Seward <jseward@acm.org> */
 
+/* This file must be compiled without link time optimisation, as otherwise
+   the asm functions below become undefined references at link time for
+   unclear reasons. */
 
 #include "pub_core_basics.h"
 #include "pub_core_libcsetjmp.h"    /* self */
 
-
 /* See include/pub_tool_libcsetjmp.h for background and rationale. */
 
 /* The alternative implementations are for ppc{32,64}-linux and
@@ -689,7 +691,6 @@ __asm__(
 ".previous                      \n\t"
 );
 #endif  /* VGP_mips64_linux */
-
 /*--------------------------------------------------------------------*/
 /*--- end                                                          ---*/
 /*--------------------------------------------------------------------*/
index 9ba4c58e00ad44239a98cf94616b4fe066a23511..93e2507c4247012dd41bc4f8a664e075cafacdcd 100644 (file)
@@ -73,7 +73,7 @@ drd_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES      = \
        $(DRD_SOURCES_COMMON)
 drd_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS     = \
        $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
-drd_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       = \
+drd_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       = $(LTO_CFLAGS) \
        $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) $(DRD_CFLAGS)
 drd_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \
        $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@)
@@ -93,7 +93,7 @@ drd_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES      = \
        $(DRD_SOURCES_COMMON)
 drd_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS     = \
        $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
-drd_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = \
+drd_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = $(LTO_CFLAGS) \
        $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) $(DRD_CFLAGS)
 drd_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \
        $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@)
index 78d9e53287cfdeff18cb02391e270c190dffa51b..8cb5c3025ea38c373ffe112be268c778695f0c1b 100644 (file)
@@ -17,7 +17,7 @@ exp_bbv_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES      = \
        $(BBV_SOURCES_COMMON)
 exp_bbv_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS     = \
        $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
-exp_bbv_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       = \
+exp_bbv_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       = $(LTO_CFLAGS) \
        $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
 exp_bbv_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \
        $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@)
@@ -37,7 +37,7 @@ exp_bbv_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES      = \
        $(BBV_SOURCES_COMMON)
 exp_bbv_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS     = \
        $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
-exp_bbv_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = \
+exp_bbv_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = $(LTO_CFLAGS) \
        $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
 exp_bbv_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \
        $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@)
index 647b72bb28f0a7afa3cdff8a7b546f1c2698f3c3..b74529858df6e1b2c7cc0b9b7fb40bfebc1553fd 100644 (file)
@@ -25,7 +25,7 @@ exp_dhat_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES      = \
        $(EXP_DHAT_SOURCES_COMMON)
 exp_dhat_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS     = \
        $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
-exp_dhat_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       = \
+exp_dhat_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       = $(LTO_CFLAGS) \
        $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
 exp_dhat_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \
        $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@)
@@ -45,7 +45,7 @@ exp_dhat_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES      = \
        $(EXP_DHAT_SOURCES_COMMON)
 exp_dhat_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS     = \
        $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
-exp_dhat_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = \
+exp_dhat_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = $(LTO_CFLAGS) \
        $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
 exp_dhat_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \
        $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@)
index 211a5ffaddcb7aa1c47da1eb1590d150dbebcd6a..8927ff630b5e2e0e263f2e024219b2dc6e4ec09d 100644 (file)
@@ -30,7 +30,7 @@ exp_sgcheck_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES      = \
        $(EXP_PTRCHECK_SOURCES_COMMON)
 exp_sgcheck_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS     = \
        $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
-exp_sgcheck_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       = \
+exp_sgcheck_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       = $(LTO_CFLAGS) \
        $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
 exp_sgcheck_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \
        $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@)
@@ -50,7 +50,7 @@ exp_sgcheck_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES      = \
        $(EXP_PTRCHECK_SOURCES_COMMON)
 exp_sgcheck_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS     = \
        $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
-exp_sgcheck_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = \
+exp_sgcheck_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = $(LTO_CFLAGS) \
        $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
 exp_sgcheck_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \
        $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@)
index b6f1b894bbd97a8a07bbd6123da932bde4e63537..6865ef2c3bb81987c498b4cd4089665961899006 100644 (file)
@@ -41,7 +41,7 @@ helgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES      = \
        $(HELGRIND_SOURCES_COMMON)
 helgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS     = \
        $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
-helgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       = \
+helgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       = $(LTO_CFLAGS) \
        $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) -O2
 helgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \
        $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@)
@@ -61,7 +61,7 @@ helgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES      = \
        $(HELGRIND_SOURCES_COMMON)
 helgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS     = \
        $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
-helgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = \
+helgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = $(LTO_CFLAGS) \
        $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) -O2
 helgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \
        $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@)
index 39ada21fddc6715948f8a6f14700959285edb780..a63023278ebb992f292da38fe800d5bd927a8ffa 100644 (file)
@@ -17,7 +17,7 @@ lackey_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES      = \
        $(LACKEY_SOURCES_COMMON)
 lackey_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS     = \
        $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
-lackey_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       = \
+lackey_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       = $(LTO_CFLAGS) \
        $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
 lackey_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \
        $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@)
@@ -37,7 +37,7 @@ lackey_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES      = \
        $(LACKEY_SOURCES_COMMON)
 lackey_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS     = \
        $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
-lackey_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = \
+lackey_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = $(LTO_CFLAGS) \
        $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
 lackey_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \
        $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@)
index 29334035d460b186c9f137ee4a7dc452d26fe88b..f631831b2be1c7d79da9957bdeea2a30935e8b87 100644 (file)
@@ -25,7 +25,7 @@ massif_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES      = \
        $(MASSIF_SOURCES_COMMON)
 massif_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS     = \
        $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
-massif_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       = \
+massif_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       = $(LTO_CFLAGS) \
        $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
 massif_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \
        $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@)
@@ -45,7 +45,7 @@ massif_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES      = \
        $(MASSIF_SOURCES_COMMON)
 massif_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS     = \
        $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
-massif_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = \
+massif_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = $(LTO_CFLAGS) \
        $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
 massif_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \
        $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@)
index 721cb21eb05d7871f4cfb95c0b5fe81f44d3a6f6..4a9512d10c77a60951307701f65a423d2c5ffe63 100644 (file)
@@ -24,7 +24,7 @@ endif
 MEMCHECK_SOURCES_COMMON = \
        mc_leakcheck.c \
        mc_malloc_wrappers.c \
-       mc_main.c \
+       mc_main.c mc_main_asm.c \
        mc_translate.c \
        mc_machine.c \
        mc_errors.c
@@ -33,7 +33,7 @@ memcheck_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES      = \
        $(MEMCHECK_SOURCES_COMMON)
 memcheck_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS     = \
        $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
-memcheck_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       = \
+memcheck_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       =  $(LTO_CFLAGS) \
        $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) -O2
 memcheck_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \
        $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@)
@@ -53,7 +53,7 @@ memcheck_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES      = \
        $(MEMCHECK_SOURCES_COMMON)
 memcheck_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS     = \
        $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
-memcheck_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = \
+memcheck_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = $(LTO_CFLAGS) \
        $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) -O2
 memcheck_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \
        $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@)
index 765634b9120ecd6e471c4c59c05d8dbb35970323..7b4c555cab355f443558d4b39bfc720e9da77e38 100644 (file)
@@ -372,6 +372,12 @@ extern ULong MC_(event_ctr)[MCPE_LAST];
 #define V_BITS64_DEFINED      0ULL
 #define V_BITS64_UNDEFINED    0xFFFFFFFFFFFFFFFFULL
 
+/* Set to 1 to enable handwritten assembly helpers on targets for
+   which it is supported. */
+#define ENABLE_ASSEMBLY_HELPERS 1
+
+/* Comment the below to disable the fast case LOADV */
+#define PERF_FAST_LOADV         1
 
 /*------------------------------------------------------------*/
 /*--- Leak checking                                        ---*/
index 834f5976e23e12520bc595b46e67ec0da80820b6..6908942cce97cb6f47c0a9428672da906135c720 100644 (file)
 #include "mc_include.h"
 #include "memcheck.h"   /* for client requests */
 
-
-/* Set to 1 to enable handwritten assembly helpers on targets for
-   which it is supported. */
-#define ENABLE_ASSEMBLY_HELPERS 1
-
 /* Set to 1 to do a little more sanity checking */
 #define VG_DEBUG_MEMORY 0
 
@@ -74,7 +69,7 @@ static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */
  
 // Comment these out to disable the fast cases (don't just set them to zero).
 
-#define PERF_FAST_LOADV    1
+/* PERF_FAST_LOADV is in mc_include.h */
 #define PERF_FAST_STOREV   1
 
 #define PERF_FAST_SARP     1
@@ -374,7 +369,17 @@ static void update_SM_counts(SecMap* oldSM, SecMap* newSM)
    space, addresses 0 .. (N_PRIMARY_MAP << 16)-1.  The rest of it is
    handled using the auxiliary primary map.  
 */
-static SecMap* primary_map[N_PRIMARY_MAP];
+#if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
+    && (defined(VGP_arm_linux) \
+        || defined(VGP_x86_linux) || defined(VGP_x86_solaris))
+/* mc_main_asm.c needs visibility on a few things declared in this file.
+   MC_MAIN_STATIC allows to define them static if ok, i.e. on
+   platforms that are not using hand-coded asm statements. */
+#define MC_MAIN_STATIC
+#else
+#define MC_MAIN_STATIC static
+#endif
+MC_MAIN_STATIC SecMap* primary_map[N_PRIMARY_MAP];
 
 
 /* An entry in the auxiliary primary map.  base must be a 64k-aligned
@@ -1364,8 +1369,13 @@ void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res,
    MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False );
 }
 
+MC_MAIN_STATIC
+__attribute__((noinline))
+__attribute__((used))
+VG_REGPARM(3)
+ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian );
 
-static
+MC_MAIN_STATIC
 __attribute__((noinline))
 __attribute__((used))
 VG_REGPARM(3) /* make sure we're using a fixed calling convention, since
@@ -4861,78 +4871,11 @@ VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a )
 // Non-generic assembly for arm32-linux
 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
     && defined(VGP_arm_linux)
-__asm__( /* Derived from the 32 bit assembly helper */
-".text                                  \n"
-".align 2                               \n"
-".global vgMemCheck_helperc_LOADV64le   \n"
-".type   vgMemCheck_helperc_LOADV64le, %function \n"
-"vgMemCheck_helperc_LOADV64le:          \n"
-"      tst    r0, #7                    \n"
-"      movw   r3, #:lower16:primary_map \n"
-"      bne    .LLV64LEc4                \n" // if misaligned
-"      lsr    r2, r0, #16               \n"
-"      movt   r3, #:upper16:primary_map \n"
-"      ldr    r2, [r3, r2, lsl #2]      \n"
-"      uxth   r1, r0                    \n" // r1 is 0-(16)-0 X-(13)-X 000
-"      movw   r3, #0xAAAA               \n"
-"      lsr    r1, r1, #2                \n" // r1 is 0-(16)-0 00 X-(13)-X 0
-"      ldrh   r1, [r2, r1]              \n"
-"      cmp    r1, r3                    \n" // 0xAAAA == VA_BITS16_DEFINED
-"      bne    .LLV64LEc0                \n" // if !all_defined
-"      mov    r1, #0x0                  \n" // 0x0 == V_BITS32_DEFINED
-"      mov    r0, #0x0                  \n" // 0x0 == V_BITS32_DEFINED
-"      bx     lr                        \n"
-".LLV64LEc0:                            \n"
-"      movw   r3, #0x5555               \n"
-"      cmp    r1, r3                    \n" // 0x5555 == VA_BITS16_UNDEFINED
-"      bne    .LLV64LEc4                \n" // if !all_undefined
-"      mov    r1, #0xFFFFFFFF           \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
-"      mov    r0, #0xFFFFFFFF           \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
-"      bx     lr                        \n"
-".LLV64LEc4:                            \n"
-"      push   {r4, lr}                  \n"
-"      mov    r2, #0                    \n"
-"      mov    r1, #64                   \n"
-"      bl     mc_LOADVn_slow            \n"
-"      pop    {r4, pc}                  \n"
-".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le \n"
-".previous\n"
-);
+/* See mc_main_asm.c */
 
 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
       && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
-__asm__(
-".text\n"
-".align 16\n"
-".global vgMemCheck_helperc_LOADV64le\n"
-".type   vgMemCheck_helperc_LOADV64le, @function\n"
-"vgMemCheck_helperc_LOADV64le:\n"
-"      test   $0x7,  %eax\n"
-"      jne    .LLV64LE2\n"          /* jump if not aligned */
-"      mov    %eax,  %ecx\n"
-"      movzwl %ax,   %edx\n"
-"      shr    $0x10, %ecx\n"
-"      mov    primary_map(,%ecx,4), %ecx\n"
-"      shr    $0x3,  %edx\n"
-"      movzwl (%ecx,%edx,2), %edx\n"
-"      cmp    $0xaaaa, %edx\n"
-"      jne    .LLV64LE1\n"          /* jump if not all defined */
-"      xor    %eax, %eax\n"         /* return 0 in edx:eax */
-"      xor    %edx, %edx\n"
-"      ret\n"
-".LLV64LE1:\n"
-"      cmp    $0x5555, %edx\n"
-"      jne    .LLV64LE2\n"         /* jump if not all undefined */
-"      or     $0xffffffff, %eax\n" /* else return all bits set in edx:eax */
-"      or     $0xffffffff, %edx\n"
-"      ret\n"
-".LLV64LE2:\n"
-"      xor    %ecx,  %ecx\n"  /* tail call to mc_LOADVn_slow(a, 64, 0) */
-"      mov    $64,   %edx\n"
-"      jmp    mc_LOADVn_slow\n"
-".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le\n"
-".previous\n"
-);
+/* See mc_main_asm.c */
 
 #else
 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
@@ -5064,71 +5007,11 @@ VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a )
 // Non-generic assembly for arm32-linux
 #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
     && defined(VGP_arm_linux)
-__asm__( /* Derived from NCode template */
-".text                                  \n"
-".align 2                               \n"
-".global vgMemCheck_helperc_LOADV32le   \n"
-".type   vgMemCheck_helperc_LOADV32le, %function \n"
-"vgMemCheck_helperc_LOADV32le:          \n"
-"      tst    r0, #3                    \n" // 1
-"      movw   r3, #:lower16:primary_map \n" // 1
-"      bne    .LLV32LEc4                \n" // 2  if misaligned
-"      lsr    r2, r0, #16               \n" // 3
-"      movt   r3, #:upper16:primary_map \n" // 3
-"      ldr    r2, [r3, r2, lsl #2]      \n" // 4
-"      uxth   r1, r0                    \n" // 4
-"      ldrb   r1, [r2, r1, lsr #2]      \n" // 5
-"      cmp    r1, #0xAA                 \n" // 6  0xAA == VA_BITS8_DEFINED
-"      bne    .LLV32LEc0                \n" // 7  if !all_defined
-"      mov    r0, #0x0                  \n" // 8  0x0 == V_BITS32_DEFINED
-"      bx     lr                        \n" // 9
-".LLV32LEc0:                            \n"
-"      cmp    r1, #0x55                 \n" // 0x55 == VA_BITS8_UNDEFINED
-"      bne    .LLV32LEc4                \n" // if !all_undefined
-"      mov    r0, #0xFFFFFFFF           \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
-"      bx     lr                        \n"
-".LLV32LEc4:                            \n"
-"      push   {r4, lr}                  \n"
-"      mov    r2, #0                    \n"
-"      mov    r1, #32                   \n"
-"      bl     mc_LOADVn_slow            \n"
-"      pop    {r4, pc}                  \n"
-".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le \n"
-".previous\n"
-);
+/* See mc_main_asm.c */
 
 #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
       && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
-__asm__(
-".text\n"
-".align 16\n"
-".global vgMemCheck_helperc_LOADV32le\n"
-".type   vgMemCheck_helperc_LOADV32le, @function\n"
-"vgMemCheck_helperc_LOADV32le:\n"
-"      test   $0x3,  %eax\n"
-"      jnz    .LLV32LE2\n"         /* jump if misaligned */
-"      mov    %eax,  %edx\n"
-"      shr    $16,   %edx\n"
-"      mov    primary_map(,%edx,4), %ecx\n"
-"      movzwl %ax,   %edx\n"
-"      shr    $2,    %edx\n"
-"      movzbl (%ecx,%edx,1), %edx\n"
-"      cmp    $0xaa, %edx\n"       /* compare to VA_BITS8_DEFINED */
-"      jne    .LLV32LE1\n"         /* jump if not completely defined */
-"      xor    %eax,  %eax\n"       /* else return V_BITS32_DEFINED */
-"      ret\n"
-".LLV32LE1:\n"
-"      cmp    $0x55, %edx\n"       /* compare to VA_BITS8_UNDEFINED */
-"      jne    .LLV32LE2\n"         /* jump if not completely undefined */
-"      or     $0xffffffff, %eax\n" /* else return V_BITS32_UNDEFINED */
-"      ret\n"
-".LLV32LE2:\n"
-"      xor    %ecx,  %ecx\n"       /* tail call mc_LOADVn_slow(a, 32, 0) */
-"      mov    $32,   %edx\n"
-"      jmp    mc_LOADVn_slow\n"
-".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le\n"
-".previous\n"
-);
+/* See mc_main_asm.c */
 
 #else
 // Generic for all platforms except {arm32,x86}-linux and x86-solaris
diff --git a/memcheck/mc_main_asm.c b/memcheck/mc_main_asm.c
new file mode 100644 (file)
index 0000000..a853ccd
--- /dev/null
@@ -0,0 +1,204 @@
+/* -*- mode: C; c-basic-offset: 3; -*- */
+
+/*--------------------------------------------------------------------*/
+/*--- MemCheck: some non-generic asm implementations of mc_main.c     */
+/*--- functions                                                    ---*/
+/*---                                                mc_main_asm.c ---*/
+/*--------------------------------------------------------------------*/
+
+/*
+   This file is part of MemCheck, a heavyweight Valgrind tool for
+   detecting memory errors.
+
+   Copyright (C) 2000-2018 Julian Seward 
+      jseward@acm.org
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+   02111-1307, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Having these in mc_main.c gives undefined references at link time,
+   when compiling with lto. Having them in a separate file solves this.
+   Also, for some toolchain, we might maybe need to disable lto. */
+
+// A bunch of include only needed for mc_include.h
+#include "pub_tool_basics.h"
+#include "pub_tool_poolalloc.h"
+#include "pub_tool_hashtable.h"
+#include "pub_tool_tooliface.h"
+
+#include "mc_include.h"
+
+// Non-generic assembly for arm32-linux
+#if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
+    && defined(VGP_arm_linux)
+__asm__( /* Derived from the 32 bit assembly helper */
+".text                                  \n"
+".align 2                               \n"
+".global vgMemCheck_helperc_LOADV64le   \n"
+".type   vgMemCheck_helperc_LOADV64le, %function \n"
+"vgMemCheck_helperc_LOADV64le:          \n"
+"      tst    r0, #7                    \n"
+"      movw   r3, #:lower16:primary_map \n"
+"      bne    .LLV64LEc4                \n" // if misaligned
+"      lsr    r2, r0, #16               \n"
+"      movt   r3, #:upper16:primary_map \n"
+"      ldr    r2, [r3, r2, lsl #2]      \n"
+"      uxth   r1, r0                    \n" // r1 is 0-(16)-0 X-(13)-X 000
+"      movw   r3, #0xAAAA               \n"
+"      lsr    r1, r1, #2                \n" // r1 is 0-(16)-0 00 X-(13)-X 0
+"      ldrh   r1, [r2, r1]              \n"
+"      cmp    r1, r3                    \n" // 0xAAAA == VA_BITS16_DEFINED
+"      bne    .LLV64LEc0                \n" // if !all_defined
+"      mov    r1, #0x0                  \n" // 0x0 == V_BITS32_DEFINED
+"      mov    r0, #0x0                  \n" // 0x0 == V_BITS32_DEFINED
+"      bx     lr                        \n"
+".LLV64LEc0:                            \n"
+"      movw   r3, #0x5555               \n"
+"      cmp    r1, r3                    \n" // 0x5555 == VA_BITS16_UNDEFINED
+"      bne    .LLV64LEc4                \n" // if !all_undefined
+"      mov    r1, #0xFFFFFFFF           \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
+"      mov    r0, #0xFFFFFFFF           \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
+"      bx     lr                        \n"
+".LLV64LEc4:                            \n"
+"      push   {r4, lr}                  \n"
+"      mov    r2, #0                    \n"
+"      mov    r1, #64                   \n"
+"      bl     mc_LOADVn_slow            \n"
+"      pop    {r4, pc}                  \n"
+".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le \n"
+".previous\n"
+);
+
+#elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
+      && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
+__asm__(
+".text\n"
+".align 16\n"
+".global vgMemCheck_helperc_LOADV64le\n"
+".type   vgMemCheck_helperc_LOADV64le, @function\n"
+"vgMemCheck_helperc_LOADV64le:\n"
+"      test   $0x7,  %eax\n"
+"      jne    .LLV64LE2\n"          /* jump if not aligned */
+"      mov    %eax,  %ecx\n"
+"      movzwl %ax,   %edx\n"
+"      shr    $0x10, %ecx\n"
+"      mov    primary_map(,%ecx,4), %ecx\n"
+"      shr    $0x3,  %edx\n"
+"      movzwl (%ecx,%edx,2), %edx\n"
+"      cmp    $0xaaaa, %edx\n"
+"      jne    .LLV64LE1\n"          /* jump if not all defined */
+"      xor    %eax, %eax\n"         /* return 0 in edx:eax */
+"      xor    %edx, %edx\n"
+"      ret\n"
+".LLV64LE1:\n"
+"      cmp    $0x5555, %edx\n"
+"      jne    .LLV64LE2\n"         /* jump if not all undefined */
+"      or     $0xffffffff, %eax\n" /* else return all bits set in edx:eax */
+"      or     $0xffffffff, %edx\n"
+"      ret\n"
+".LLV64LE2:\n"
+"      xor    %ecx,  %ecx\n"  /* tail call to mc_LOADVn_slow(a, 64, 0) */
+"      mov    $64,   %edx\n"
+"      jmp    mc_LOADVn_slow\n"
+".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le\n"
+".previous\n"
+);
+
+#else
+// Generic for all platforms except {arm32,x86}-linux and x86-solaris
+// is in mc_main.c
+#endif
+
+
+// Non-generic assembly for arm32-linux
+#if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
+    && defined(VGP_arm_linux)
+__asm__( /* Derived from NCode template */
+".text                                  \n"
+".align 2                               \n"
+".global vgMemCheck_helperc_LOADV32le   \n"
+".type   vgMemCheck_helperc_LOADV32le, %function \n"
+"vgMemCheck_helperc_LOADV32le:          \n"
+"      tst    r0, #3                    \n" // 1
+"      movw   r3, #:lower16:primary_map \n" // 1
+"      bne    .LLV32LEc4                \n" // 2  if misaligned
+"      lsr    r2, r0, #16               \n" // 3
+"      movt   r3, #:upper16:primary_map \n" // 3
+"      ldr    r2, [r3, r2, lsl #2]      \n" // 4
+"      uxth   r1, r0                    \n" // 4
+"      ldrb   r1, [r2, r1, lsr #2]      \n" // 5
+"      cmp    r1, #0xAA                 \n" // 6  0xAA == VA_BITS8_DEFINED
+"      bne    .LLV32LEc0                \n" // 7  if !all_defined
+"      mov    r0, #0x0                  \n" // 8  0x0 == V_BITS32_DEFINED
+"      bx     lr                        \n" // 9
+".LLV32LEc0:                            \n"
+"      cmp    r1, #0x55                 \n" // 0x55 == VA_BITS8_UNDEFINED
+"      bne    .LLV32LEc4                \n" // if !all_undefined
+"      mov    r0, #0xFFFFFFFF           \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED
+"      bx     lr                        \n"
+".LLV32LEc4:                            \n"
+"      push   {r4, lr}                  \n"
+"      mov    r2, #0                    \n"
+"      mov    r1, #32                   \n"
+"      bl     mc_LOADVn_slow            \n"
+"      pop    {r4, pc}                  \n"
+".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le \n"
+".previous\n"
+);
+
+#elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \
+      && (defined(VGP_x86_linux) || defined(VGP_x86_solaris))
+__asm__(
+".text\n"
+".align 16\n"
+".global vgMemCheck_helperc_LOADV32le\n"
+".type   vgMemCheck_helperc_LOADV32le, @function\n"
+"vgMemCheck_helperc_LOADV32le:\n"
+"      test   $0x3,  %eax\n"
+"      jnz    .LLV32LE2\n"         /* jump if misaligned */
+"      mov    %eax,  %edx\n"
+"      shr    $16,   %edx\n"
+"      mov    primary_map(,%edx,4), %ecx\n"
+"      movzwl %ax,   %edx\n"
+"      shr    $2,    %edx\n"
+"      movzbl (%ecx,%edx,1), %edx\n"
+"      cmp    $0xaa, %edx\n"       /* compare to VA_BITS8_DEFINED */
+"      jne    .LLV32LE1\n"         /* jump if not completely defined */
+"      xor    %eax,  %eax\n"       /* else return V_BITS32_DEFINED */
+"      ret\n"
+".LLV32LE1:\n"
+"      cmp    $0x55, %edx\n"       /* compare to VA_BITS8_UNDEFINED */
+"      jne    .LLV32LE2\n"         /* jump if not completely undefined */
+"      or     $0xffffffff, %eax\n" /* else return V_BITS32_UNDEFINED */
+"      ret\n"
+".LLV32LE2:\n"
+"      xor    %ecx,  %ecx\n"       /* tail call mc_LOADVn_slow(a, 32, 0) */
+"      mov    $32,   %edx\n"
+"      jmp    mc_LOADVn_slow\n"
+".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le\n"
+".previous\n"
+);
+
+#else
+// Generic for all platforms except {arm32,x86}-linux and x86-solaris
+// is in mc_main.c
+#endif
+
+/*--------------------------------------------------------------------*/
+/*--- end                                                          ---*/
+/*--------------------------------------------------------------------*/
index 753687eecbaa1da6c41e516b1ea128ccce9e81d8..5d8b72ee98dc51cd56e8a6e6f50292ff7cfeac26 100644 (file)
@@ -17,7 +17,7 @@ none_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES      = \
        $(NONE_SOURCES_COMMON)
 none_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS     = \
        $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
-none_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       = \
+none_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS       = $(LTO_CFLAGS) \
        $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@)
 none_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \
        $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@)
@@ -37,7 +37,7 @@ none_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES      = \
        $(NONE_SOURCES_COMMON)
 none_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS     = \
        $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
-none_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = \
+none_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS       = $(LTO_CFLAGS) \
        $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@)
 none_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \
        $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@)