From: Philippe Waroquiers Date: Sun, 11 Mar 2018 21:26:08 +0000 (+0100) Subject: Fix 338252 - building valgrind with -flto (link time optimisation) fails X-Git-Tag: VALGRIND_3_14_0~136 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=ab773096df7aaaf46e8883af5ed4690f4d4499af;p=thirdparty%2Fvalgrind.git Fix 338252 - building valgrind with -flto (link time optimisation) fails * Addition of a new configure option --enable-lto=yes or --enable-lto=no Default value is --enable-lto=no, as the build is significantly slower, so is not appropriate for valgrind development : this should be used only on buildbots and/or by packagers. * Some files containins asm functions have to be compiled without lto: coregrind/m_libcsetjmp.c coregrind/m_main.c If these are compiled with lto, that gives undefined symbols at link time. The files to compile without lto are coregrind/m_libcsetjmp.c coregrind/m_main.c To compile these files with other options, a noinst target lib is defined. The objects of this library are then added to the libcoregrind. * memcheck/mc_main.c : move the handwritten asm helpers to mc_main_asm.c. This avoids undefined symbols on some toolchains. Due to this, the preprocessor symbols that activate the fast or asm memcheck helpers are moved to mc_include.h Platforms with handwritten helpers will also have the memcheck primary map defined non static. * In VEX, auxprogs/genoffsets.c also has to be compiled without lto, as the asm produced by the compiler is post-processed to produce pub/libvex_guest_offsets.h. lto not producing asm means the generation fails if we used -flto to compile this file. * all the various Makefile*am are modified to use LTO_CFLAGS for (most) targets. LTO_CFLAGS is empty when --enable-lto=no, otherwise is set to the flags needed for gcc. If --enable-lto=no, LTO_AR and LTO_RANLIB are the standard AR and RANLIB, otherwise they are the lto capable versions (gcc-ar and gcc-ranlib). * This has been tested on: debian 9.4/gcc 6.3.0/amd64+x86 rhel 7.4/gcc 6.4.0/amd64 ubuntu 17.10/gcc 7.2.0/amd64+x86 fedora26/gcc 7.3.1/s390x No regressions on the above. --- diff --git a/Makefile.vex.am b/Makefile.vex.am index 4ad5ffa67f..64f837352b 100644 --- a/Makefile.vex.am +++ b/Makefile.vex.am @@ -68,6 +68,10 @@ else CFLAGS_FOR_GENOFFSETS = $(CFLAGS) endif +# for VEX building, use the LTO versions, in case they differ from non lto versions +AR = ${LTO_AR} +RANLIB = ${LTO_RANLIB} + # This is very uggerly. Need to sed out both "xyzzyN" and # "xyzzy$N" since gcc on different targets emits the constants # differently -- with a leading $ on x86/amd64 but none on ppc32/64. @@ -86,7 +90,7 @@ pub/libvex_guest_offsets.h: auxprogs/genoffsets.c \ rm -f auxprogs/genoffsets.s $(mkdir_p) auxprogs pub $(CC) $(CFLAGS_FOR_GENOFFSETS) \ - $(LIBVEX_CFLAGS) \ + $(LIBVEX_CFLAGS_NO_LTO) \ $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) \ -O -S -o auxprogs/genoffsets.s \ $(srcdir)/auxprogs/genoffsets.c @@ -162,10 +166,13 @@ LIBVEX_SOURCES_COMMON = \ LIBVEXMULTIARCH_SOURCES = priv/multiarch_main_main.c -LIBVEX_CFLAGS = \ +LIBVEX_CFLAGS_NO_LTO = \ -Wbad-function-cast \ -fstrict-aliasing +LIBVEX_CFLAGS = ${LTO_CFLAGS} \ + ${LIBVEX_CFLAGS_NO_LTO} + libvex_@VGCONF_ARCH_PRI@_@VGCONF_OS@_a_SOURCES = $(LIBVEX_SOURCES_COMMON) libvex_@VGCONF_ARCH_PRI@_@VGCONF_OS@_a_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) -Ipriv diff --git a/NEWS b/NEWS index 3d04b75943..f762530a92 100644 --- a/NEWS +++ b/NEWS @@ -35,6 +35,11 @@ support for X86/macOS 10.13, AMD64/macOS 10.13. * ==================== OTHER CHANGES ==================== +* A new configure option --enable-lto=yes allows to build Valgrind + with link time optimisation. If the toolchain supports it, + this produces a smaller/faster Valgrind (up to 10%). + Note that if you are doing some Valgrind development, --enable-lto=yes + significantly slows down the build. * ==================== FIXED BUGS ==================== @@ -52,6 +57,7 @@ where XXXXXX is the bug number as listed below. 79362 Debug info is lost for .so files when they are dlclose'd 208052 strlcpy error when n = 0 255603 exp-sgcheck Assertion '!already_present' failed +338252 - building valgrind with -flto (link time optimisation) fails 376257 helgrind history full speed up using a cached stack 379373 Fix syscall param msg->desc.port.name points to uninitialised byte(s) on macOS 10.12 @@ -91,7 +97,7 @@ where XXXXXX is the bug number as listed below. 387712 s390x cgijnl reports Conditional jump depends on uninitialised value 387773 .gnu_debugaltlink paths resolve relative to .debug file, not symlink 388862 Add replacements for wmemchr and wcsnlen on Linux -389373 exp-sgcheck the 'impossible' happened as st_LoadG is not instrumented +389373 exp-sgcheck the 'impossible' happened as Ist_LoadG is not instrumented 389065 valgrind meets gcc flag -Wlogical-op 390723 make xtree dump files world wide readable, similar to log files diff --git a/README_PACKAGERS b/README_PACKAGERS index 28ee8ba129..e86c15309b 100644 --- a/README_PACKAGERS +++ b/README_PACKAGERS @@ -5,6 +5,9 @@ building binary distributions of Valgrind. Thanks for taking the time and effort to make a binary distribution of Valgrind. The following notes may save you some trouble. +-- If your toolchain (compiler, linker) support lto, using the configure + option --enable-lto=yes will produce a smaller/faster valgrind + (up to 10%). -- Do not ship your Linux distro with a completely stripped /lib/ld.so. At least leave the debugging symbol names on -- line @@ -28,7 +31,7 @@ Valgrind. The following notes may save you some trouble. not any debuginfo or extra symbols for any other libraries. --- (Unfortunate but true) When you configure to build with the +-- (Unfortunate but true) When you configure to build with the --prefix=/foo/bar/xyzzy option, the prefix /foo/bar/xyzzy gets baked into valgrind. The consequence is that you _must_ install valgrind at the location specified in the prefix. If you don't, diff --git a/cachegrind/Makefile.am b/cachegrind/Makefile.am index a587d24124..f8447a17ce 100644 --- a/cachegrind/Makefile.am +++ b/cachegrind/Makefile.am @@ -53,7 +53,7 @@ cachegrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES = \ $(CACHEGRIND_SOURCES_COMMON) cachegrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) -cachegrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = \ +cachegrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) cachegrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \ $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@) @@ -73,7 +73,7 @@ cachegrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES = \ $(CACHEGRIND_SOURCES_COMMON) cachegrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) -cachegrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = \ +cachegrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS)\ $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) cachegrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \ $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@) diff --git a/callgrind/Makefile.am b/callgrind/Makefile.am index 56e8a04c23..7cecb44b3e 100644 --- a/callgrind/Makefile.am +++ b/callgrind/Makefile.am @@ -53,7 +53,7 @@ callgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES = \ $(CALLGRIND_SOURCES_COMMON) callgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) -callgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = \ +callgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) $(CALLGRIND_CFLAGS_COMMON) callgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \ $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@) @@ -73,7 +73,7 @@ callgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES = \ $(CALLGRIND_SOURCES_COMMON) callgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) -callgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = \ +callgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) $(CALLGRIND_CFLAGS_COMMON) callgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \ $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@) diff --git a/configure.ac b/configure.ac index 3988d62c63..03704981de 100644 --- a/configure.ac +++ b/configure.ac @@ -45,6 +45,12 @@ AC_PROG_CXX # AC_SUBST([OBJCFLAGS]) # ]) AC_PROG_RANLIB +# Set LTO_RANLIB variable to an lto enabled ranlib +if test "x$LTO_RANLIB" = "x"; then + AC_PATH_PROGS([LTO_RANLIB], [gcc-ranlib]) +fi +AC_ARG_VAR([LTO_RANLIB],[Library indexer command for link time optimisation]) + # provide a very basic definition for AC_PROG_SED if it's not provided by # autoconf (as e.g. in autoconf 2.59). m4_ifndef([AC_PROG_SED], @@ -60,6 +66,13 @@ if test "x$AR" = "x"; then fi AC_ARG_VAR([AR],[Archiver command]) +# same for LTO_AR variable for lto enabled archiver +if test "x$LTO_AR" = "x"; then + AC_PATH_PROGS([LTO_AR], [gcc-ar]) +fi +AC_ARG_VAR([LTO_AR],[Archiver command for link time optimisation]) + + # Check for the compiler support if test "${GCC}" != "yes" ; then AC_MSG_ERROR([Valgrind relies on GCC to be compiled]) @@ -1945,6 +1958,55 @@ AM_CONDITIONAL(HAS_MLONG_DOUBLE_128, test x$ac_compiler_supports_mlong_double_12 FLAG_MLONG_DOUBLE_128="-mlong-double-128" AC_SUBST(FLAG_MLONG_DOUBLE_128) +# does this toolchain support lto ? +# Not checked for if --enable-lto=no was given, or if LTO_AR or LTO_RANLIG +# are not defined +# If not enable-lto=* arg is provided, default to no, as lto builds are +# a lot slower, and so not appropriate for Valgrind developments. +# --enable-lto=yes should be used by distro packagers. +AC_CACHE_CHECK([for using the link time optimisation], vg_cv_lto, + [AC_ARG_ENABLE(lto, + [ --enable-lto enables building with link time optimisation], + [vg_cv_lto=$enableval], + [vg_cv_lto=no])]) + +if test "x${vg_cv_lto}" != "xno" -a "x${LTO_AR}" != "x" -a "x${LTO_RANLIB}" != "x"; then +AC_MSG_CHECKING([if toolchain accepts lto]) +safe_CFLAGS=$CFLAGS +TEST_LTO_CFLAGS="-flto -flto-partition=one -fuse-linker-plugin" +# Note : using 'one' partition is giving a slightly smaller/faster memcheck +# and ld/lto-trans1 still needs a reasonable memory (about 0.5GB) when linking. +CFLAGS="$TEST_LTO_CFLAGS -Werror" + +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[ + extern void somefun(void); + somefun(); + return 0; +]])], [ +LTO_CFLAGS=$TEST_LTO_CFLAGS +AC_MSG_RESULT([yes]) +], [ +LTO_CFLAGS="" +AC_MSG_RESULT([no]) +]) +CFLAGS=$safe_CFLAGS +fi + +AC_SUBST(LTO_CFLAGS) + +# if we could not compile with lto args, or lto was disabled, +# then set LTO_AR/LTO_RANLIB to the non lto values +# define in config.h ENABLE_LTO (not needed by the code currently, but +# this guarantees we recompile everything if we re-configure and rebuild +# in a build dir previously build with another value of --enable-lto +if test "x${LTO_CFLAGS}" = "x"; then + LTO_AR=${AR} + LTO_RANLIB=${RANLIB} + vg_cv_lto=no +else + vg_cv_lto=yes + AC_DEFINE([ENABLE_LTO], 1, [configured to build with lto link time optimisation]) +fi # Convenience function to check whether GCC supports a particular # warning option. Takes two arguments, @@ -4585,6 +4647,7 @@ cat< */ +/* This file must be compiled without link time optimisation, as otherwise + the asm functions below become undefined references at link time for + unclear reasons. */ #include "pub_core_basics.h" #include "pub_core_libcsetjmp.h" /* self */ - /* See include/pub_tool_libcsetjmp.h for background and rationale. */ /* The alternative implementations are for ppc{32,64}-linux and @@ -689,7 +691,6 @@ __asm__( ".previous \n\t" ); #endif /* VGP_mips64_linux */ - /*--------------------------------------------------------------------*/ /*--- end ---*/ /*--------------------------------------------------------------------*/ diff --git a/drd/Makefile.am b/drd/Makefile.am index 9ba4c58e00..93e2507c42 100644 --- a/drd/Makefile.am +++ b/drd/Makefile.am @@ -73,7 +73,7 @@ drd_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES = \ $(DRD_SOURCES_COMMON) drd_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) -drd_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = \ +drd_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) $(DRD_CFLAGS) drd_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \ $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@) @@ -93,7 +93,7 @@ drd_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES = \ $(DRD_SOURCES_COMMON) drd_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) -drd_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = \ +drd_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) $(DRD_CFLAGS) drd_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \ $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@) diff --git a/exp-bbv/Makefile.am b/exp-bbv/Makefile.am index 78d9e53287..8cb5c3025e 100644 --- a/exp-bbv/Makefile.am +++ b/exp-bbv/Makefile.am @@ -17,7 +17,7 @@ exp_bbv_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES = \ $(BBV_SOURCES_COMMON) exp_bbv_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) -exp_bbv_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = \ +exp_bbv_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) exp_bbv_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \ $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@) @@ -37,7 +37,7 @@ exp_bbv_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES = \ $(BBV_SOURCES_COMMON) exp_bbv_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) -exp_bbv_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = \ +exp_bbv_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) exp_bbv_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \ $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@) diff --git a/exp-dhat/Makefile.am b/exp-dhat/Makefile.am index 647b72bb28..b74529858d 100644 --- a/exp-dhat/Makefile.am +++ b/exp-dhat/Makefile.am @@ -25,7 +25,7 @@ exp_dhat_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES = \ $(EXP_DHAT_SOURCES_COMMON) exp_dhat_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) -exp_dhat_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = \ +exp_dhat_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) exp_dhat_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \ $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@) @@ -45,7 +45,7 @@ exp_dhat_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES = \ $(EXP_DHAT_SOURCES_COMMON) exp_dhat_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) -exp_dhat_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = \ +exp_dhat_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) exp_dhat_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \ $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@) diff --git a/exp-sgcheck/Makefile.am b/exp-sgcheck/Makefile.am index 211a5ffadd..8927ff630b 100644 --- a/exp-sgcheck/Makefile.am +++ b/exp-sgcheck/Makefile.am @@ -30,7 +30,7 @@ exp_sgcheck_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES = \ $(EXP_PTRCHECK_SOURCES_COMMON) exp_sgcheck_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) -exp_sgcheck_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = \ +exp_sgcheck_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) exp_sgcheck_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \ $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@) @@ -50,7 +50,7 @@ exp_sgcheck_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES = \ $(EXP_PTRCHECK_SOURCES_COMMON) exp_sgcheck_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) -exp_sgcheck_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = \ +exp_sgcheck_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) exp_sgcheck_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \ $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@) diff --git a/helgrind/Makefile.am b/helgrind/Makefile.am index b6f1b894bb..6865ef2c3b 100644 --- a/helgrind/Makefile.am +++ b/helgrind/Makefile.am @@ -41,7 +41,7 @@ helgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES = \ $(HELGRIND_SOURCES_COMMON) helgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) -helgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = \ +helgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) -O2 helgrind_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \ $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@) @@ -61,7 +61,7 @@ helgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES = \ $(HELGRIND_SOURCES_COMMON) helgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) -helgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = \ +helgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) -O2 helgrind_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \ $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@) diff --git a/lackey/Makefile.am b/lackey/Makefile.am index 39ada21fdd..a63023278e 100644 --- a/lackey/Makefile.am +++ b/lackey/Makefile.am @@ -17,7 +17,7 @@ lackey_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES = \ $(LACKEY_SOURCES_COMMON) lackey_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) -lackey_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = \ +lackey_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) lackey_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \ $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@) @@ -37,7 +37,7 @@ lackey_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES = \ $(LACKEY_SOURCES_COMMON) lackey_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) -lackey_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = \ +lackey_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) lackey_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \ $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@) diff --git a/massif/Makefile.am b/massif/Makefile.am index 29334035d4..f631831b2b 100644 --- a/massif/Makefile.am +++ b/massif/Makefile.am @@ -25,7 +25,7 @@ massif_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES = \ $(MASSIF_SOURCES_COMMON) massif_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) -massif_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = \ +massif_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) massif_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \ $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@) @@ -45,7 +45,7 @@ massif_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES = \ $(MASSIF_SOURCES_COMMON) massif_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) -massif_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = \ +massif_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) massif_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \ $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@) diff --git a/memcheck/Makefile.am b/memcheck/Makefile.am index 721cb21eb0..4a9512d10c 100644 --- a/memcheck/Makefile.am +++ b/memcheck/Makefile.am @@ -24,7 +24,7 @@ endif MEMCHECK_SOURCES_COMMON = \ mc_leakcheck.c \ mc_malloc_wrappers.c \ - mc_main.c \ + mc_main.c mc_main_asm.c \ mc_translate.c \ mc_machine.c \ mc_errors.c @@ -33,7 +33,7 @@ memcheck_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES = \ $(MEMCHECK_SOURCES_COMMON) memcheck_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) -memcheck_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = \ +memcheck_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) -O2 memcheck_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \ $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@) @@ -53,7 +53,7 @@ memcheck_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES = \ $(MEMCHECK_SOURCES_COMMON) memcheck_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) -memcheck_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = \ +memcheck_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) -O2 memcheck_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \ $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@) diff --git a/memcheck/mc_include.h b/memcheck/mc_include.h index 765634b912..7b4c555cab 100644 --- a/memcheck/mc_include.h +++ b/memcheck/mc_include.h @@ -372,6 +372,12 @@ extern ULong MC_(event_ctr)[MCPE_LAST]; #define V_BITS64_DEFINED 0ULL #define V_BITS64_UNDEFINED 0xFFFFFFFFFFFFFFFFULL +/* Set to 1 to enable handwritten assembly helpers on targets for + which it is supported. */ +#define ENABLE_ASSEMBLY_HELPERS 1 + +/* Comment the below to disable the fast case LOADV */ +#define PERF_FAST_LOADV 1 /*------------------------------------------------------------*/ /*--- Leak checking ---*/ diff --git a/memcheck/mc_main.c b/memcheck/mc_main.c index 834f5976e2..6908942cce 100644 --- a/memcheck/mc_main.c +++ b/memcheck/mc_main.c @@ -54,11 +54,6 @@ #include "mc_include.h" #include "memcheck.h" /* for client requests */ - -/* Set to 1 to enable handwritten assembly helpers on targets for - which it is supported. */ -#define ENABLE_ASSEMBLY_HELPERS 1 - /* Set to 1 to do a little more sanity checking */ #define VG_DEBUG_MEMORY 0 @@ -74,7 +69,7 @@ static void ocache_sarp_Clear_Origins ( Addr, UWord ); /* fwds */ // Comment these out to disable the fast cases (don't just set them to zero). -#define PERF_FAST_LOADV 1 +/* PERF_FAST_LOADV is in mc_include.h */ #define PERF_FAST_STOREV 1 #define PERF_FAST_SARP 1 @@ -374,7 +369,17 @@ static void update_SM_counts(SecMap* oldSM, SecMap* newSM) space, addresses 0 .. (N_PRIMARY_MAP << 16)-1. The rest of it is handled using the auxiliary primary map. */ -static SecMap* primary_map[N_PRIMARY_MAP]; +#if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \ + && (defined(VGP_arm_linux) \ + || defined(VGP_x86_linux) || defined(VGP_x86_solaris)) +/* mc_main_asm.c needs visibility on a few things declared in this file. + MC_MAIN_STATIC allows to define them static if ok, i.e. on + platforms that are not using hand-coded asm statements. */ +#define MC_MAIN_STATIC +#else +#define MC_MAIN_STATIC static +#endif +MC_MAIN_STATIC SecMap* primary_map[N_PRIMARY_MAP]; /* An entry in the auxiliary primary map. base must be a 64k-aligned @@ -1364,8 +1369,13 @@ void mc_LOADV_128_or_256_slow ( /*OUT*/ULong* res, MC_(record_address_error)( VG_(get_running_tid)(), a, szB, False ); } +MC_MAIN_STATIC +__attribute__((noinline)) +__attribute__((used)) +VG_REGPARM(3) +ULong mc_LOADVn_slow ( Addr a, SizeT nBits, Bool bigendian ); -static +MC_MAIN_STATIC __attribute__((noinline)) __attribute__((used)) VG_REGPARM(3) /* make sure we're using a fixed calling convention, since @@ -4861,78 +4871,11 @@ VG_REGPARM(1) ULong MC_(helperc_LOADV64be) ( Addr a ) // Non-generic assembly for arm32-linux #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \ && defined(VGP_arm_linux) -__asm__( /* Derived from the 32 bit assembly helper */ -".text \n" -".align 2 \n" -".global vgMemCheck_helperc_LOADV64le \n" -".type vgMemCheck_helperc_LOADV64le, %function \n" -"vgMemCheck_helperc_LOADV64le: \n" -" tst r0, #7 \n" -" movw r3, #:lower16:primary_map \n" -" bne .LLV64LEc4 \n" // if misaligned -" lsr r2, r0, #16 \n" -" movt r3, #:upper16:primary_map \n" -" ldr r2, [r3, r2, lsl #2] \n" -" uxth r1, r0 \n" // r1 is 0-(16)-0 X-(13)-X 000 -" movw r3, #0xAAAA \n" -" lsr r1, r1, #2 \n" // r1 is 0-(16)-0 00 X-(13)-X 0 -" ldrh r1, [r2, r1] \n" -" cmp r1, r3 \n" // 0xAAAA == VA_BITS16_DEFINED -" bne .LLV64LEc0 \n" // if !all_defined -" mov r1, #0x0 \n" // 0x0 == V_BITS32_DEFINED -" mov r0, #0x0 \n" // 0x0 == V_BITS32_DEFINED -" bx lr \n" -".LLV64LEc0: \n" -" movw r3, #0x5555 \n" -" cmp r1, r3 \n" // 0x5555 == VA_BITS16_UNDEFINED -" bne .LLV64LEc4 \n" // if !all_undefined -" mov r1, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED -" mov r0, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED -" bx lr \n" -".LLV64LEc4: \n" -" push {r4, lr} \n" -" mov r2, #0 \n" -" mov r1, #64 \n" -" bl mc_LOADVn_slow \n" -" pop {r4, pc} \n" -".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le \n" -".previous\n" -); +/* See mc_main_asm.c */ #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \ && (defined(VGP_x86_linux) || defined(VGP_x86_solaris)) -__asm__( -".text\n" -".align 16\n" -".global vgMemCheck_helperc_LOADV64le\n" -".type vgMemCheck_helperc_LOADV64le, @function\n" -"vgMemCheck_helperc_LOADV64le:\n" -" test $0x7, %eax\n" -" jne .LLV64LE2\n" /* jump if not aligned */ -" mov %eax, %ecx\n" -" movzwl %ax, %edx\n" -" shr $0x10, %ecx\n" -" mov primary_map(,%ecx,4), %ecx\n" -" shr $0x3, %edx\n" -" movzwl (%ecx,%edx,2), %edx\n" -" cmp $0xaaaa, %edx\n" -" jne .LLV64LE1\n" /* jump if not all defined */ -" xor %eax, %eax\n" /* return 0 in edx:eax */ -" xor %edx, %edx\n" -" ret\n" -".LLV64LE1:\n" -" cmp $0x5555, %edx\n" -" jne .LLV64LE2\n" /* jump if not all undefined */ -" or $0xffffffff, %eax\n" /* else return all bits set in edx:eax */ -" or $0xffffffff, %edx\n" -" ret\n" -".LLV64LE2:\n" -" xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 64, 0) */ -" mov $64, %edx\n" -" jmp mc_LOADVn_slow\n" -".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le\n" -".previous\n" -); +/* See mc_main_asm.c */ #else // Generic for all platforms except {arm32,x86}-linux and x86-solaris @@ -5064,71 +5007,11 @@ VG_REGPARM(1) UWord MC_(helperc_LOADV32be) ( Addr a ) // Non-generic assembly for arm32-linux #if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \ && defined(VGP_arm_linux) -__asm__( /* Derived from NCode template */ -".text \n" -".align 2 \n" -".global vgMemCheck_helperc_LOADV32le \n" -".type vgMemCheck_helperc_LOADV32le, %function \n" -"vgMemCheck_helperc_LOADV32le: \n" -" tst r0, #3 \n" // 1 -" movw r3, #:lower16:primary_map \n" // 1 -" bne .LLV32LEc4 \n" // 2 if misaligned -" lsr r2, r0, #16 \n" // 3 -" movt r3, #:upper16:primary_map \n" // 3 -" ldr r2, [r3, r2, lsl #2] \n" // 4 -" uxth r1, r0 \n" // 4 -" ldrb r1, [r2, r1, lsr #2] \n" // 5 -" cmp r1, #0xAA \n" // 6 0xAA == VA_BITS8_DEFINED -" bne .LLV32LEc0 \n" // 7 if !all_defined -" mov r0, #0x0 \n" // 8 0x0 == V_BITS32_DEFINED -" bx lr \n" // 9 -".LLV32LEc0: \n" -" cmp r1, #0x55 \n" // 0x55 == VA_BITS8_UNDEFINED -" bne .LLV32LEc4 \n" // if !all_undefined -" mov r0, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED -" bx lr \n" -".LLV32LEc4: \n" -" push {r4, lr} \n" -" mov r2, #0 \n" -" mov r1, #32 \n" -" bl mc_LOADVn_slow \n" -" pop {r4, pc} \n" -".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le \n" -".previous\n" -); +/* See mc_main_asm.c */ #elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \ && (defined(VGP_x86_linux) || defined(VGP_x86_solaris)) -__asm__( -".text\n" -".align 16\n" -".global vgMemCheck_helperc_LOADV32le\n" -".type vgMemCheck_helperc_LOADV32le, @function\n" -"vgMemCheck_helperc_LOADV32le:\n" -" test $0x3, %eax\n" -" jnz .LLV32LE2\n" /* jump if misaligned */ -" mov %eax, %edx\n" -" shr $16, %edx\n" -" mov primary_map(,%edx,4), %ecx\n" -" movzwl %ax, %edx\n" -" shr $2, %edx\n" -" movzbl (%ecx,%edx,1), %edx\n" -" cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */ -" jne .LLV32LE1\n" /* jump if not completely defined */ -" xor %eax, %eax\n" /* else return V_BITS32_DEFINED */ -" ret\n" -".LLV32LE1:\n" -" cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */ -" jne .LLV32LE2\n" /* jump if not completely undefined */ -" or $0xffffffff, %eax\n" /* else return V_BITS32_UNDEFINED */ -" ret\n" -".LLV32LE2:\n" -" xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 32, 0) */ -" mov $32, %edx\n" -" jmp mc_LOADVn_slow\n" -".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le\n" -".previous\n" -); +/* See mc_main_asm.c */ #else // Generic for all platforms except {arm32,x86}-linux and x86-solaris diff --git a/memcheck/mc_main_asm.c b/memcheck/mc_main_asm.c new file mode 100644 index 0000000000..a853ccd4c4 --- /dev/null +++ b/memcheck/mc_main_asm.c @@ -0,0 +1,204 @@ +/* -*- mode: C; c-basic-offset: 3; -*- */ + +/*--------------------------------------------------------------------*/ +/*--- MemCheck: some non-generic asm implementations of mc_main.c */ +/*--- functions ---*/ +/*--- mc_main_asm.c ---*/ +/*--------------------------------------------------------------------*/ + +/* + This file is part of MemCheck, a heavyweight Valgrind tool for + detecting memory errors. + + Copyright (C) 2000-2018 Julian Seward + jseward@acm.org + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307, USA. + + The GNU General Public License is contained in the file COPYING. +*/ + +/* Having these in mc_main.c gives undefined references at link time, + when compiling with lto. Having them in a separate file solves this. + Also, for some toolchain, we might maybe need to disable lto. */ + +// A bunch of include only needed for mc_include.h +#include "pub_tool_basics.h" +#include "pub_tool_poolalloc.h" +#include "pub_tool_hashtable.h" +#include "pub_tool_tooliface.h" + +#include "mc_include.h" + +// Non-generic assembly for arm32-linux +#if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \ + && defined(VGP_arm_linux) +__asm__( /* Derived from the 32 bit assembly helper */ +".text \n" +".align 2 \n" +".global vgMemCheck_helperc_LOADV64le \n" +".type vgMemCheck_helperc_LOADV64le, %function \n" +"vgMemCheck_helperc_LOADV64le: \n" +" tst r0, #7 \n" +" movw r3, #:lower16:primary_map \n" +" bne .LLV64LEc4 \n" // if misaligned +" lsr r2, r0, #16 \n" +" movt r3, #:upper16:primary_map \n" +" ldr r2, [r3, r2, lsl #2] \n" +" uxth r1, r0 \n" // r1 is 0-(16)-0 X-(13)-X 000 +" movw r3, #0xAAAA \n" +" lsr r1, r1, #2 \n" // r1 is 0-(16)-0 00 X-(13)-X 0 +" ldrh r1, [r2, r1] \n" +" cmp r1, r3 \n" // 0xAAAA == VA_BITS16_DEFINED +" bne .LLV64LEc0 \n" // if !all_defined +" mov r1, #0x0 \n" // 0x0 == V_BITS32_DEFINED +" mov r0, #0x0 \n" // 0x0 == V_BITS32_DEFINED +" bx lr \n" +".LLV64LEc0: \n" +" movw r3, #0x5555 \n" +" cmp r1, r3 \n" // 0x5555 == VA_BITS16_UNDEFINED +" bne .LLV64LEc4 \n" // if !all_undefined +" mov r1, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED +" mov r0, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED +" bx lr \n" +".LLV64LEc4: \n" +" push {r4, lr} \n" +" mov r2, #0 \n" +" mov r1, #64 \n" +" bl mc_LOADVn_slow \n" +" pop {r4, pc} \n" +".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le \n" +".previous\n" +); + +#elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \ + && (defined(VGP_x86_linux) || defined(VGP_x86_solaris)) +__asm__( +".text\n" +".align 16\n" +".global vgMemCheck_helperc_LOADV64le\n" +".type vgMemCheck_helperc_LOADV64le, @function\n" +"vgMemCheck_helperc_LOADV64le:\n" +" test $0x7, %eax\n" +" jne .LLV64LE2\n" /* jump if not aligned */ +" mov %eax, %ecx\n" +" movzwl %ax, %edx\n" +" shr $0x10, %ecx\n" +" mov primary_map(,%ecx,4), %ecx\n" +" shr $0x3, %edx\n" +" movzwl (%ecx,%edx,2), %edx\n" +" cmp $0xaaaa, %edx\n" +" jne .LLV64LE1\n" /* jump if not all defined */ +" xor %eax, %eax\n" /* return 0 in edx:eax */ +" xor %edx, %edx\n" +" ret\n" +".LLV64LE1:\n" +" cmp $0x5555, %edx\n" +" jne .LLV64LE2\n" /* jump if not all undefined */ +" or $0xffffffff, %eax\n" /* else return all bits set in edx:eax */ +" or $0xffffffff, %edx\n" +" ret\n" +".LLV64LE2:\n" +" xor %ecx, %ecx\n" /* tail call to mc_LOADVn_slow(a, 64, 0) */ +" mov $64, %edx\n" +" jmp mc_LOADVn_slow\n" +".size vgMemCheck_helperc_LOADV64le, .-vgMemCheck_helperc_LOADV64le\n" +".previous\n" +); + +#else +// Generic for all platforms except {arm32,x86}-linux and x86-solaris +// is in mc_main.c +#endif + + +// Non-generic assembly for arm32-linux +#if ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \ + && defined(VGP_arm_linux) +__asm__( /* Derived from NCode template */ +".text \n" +".align 2 \n" +".global vgMemCheck_helperc_LOADV32le \n" +".type vgMemCheck_helperc_LOADV32le, %function \n" +"vgMemCheck_helperc_LOADV32le: \n" +" tst r0, #3 \n" // 1 +" movw r3, #:lower16:primary_map \n" // 1 +" bne .LLV32LEc4 \n" // 2 if misaligned +" lsr r2, r0, #16 \n" // 3 +" movt r3, #:upper16:primary_map \n" // 3 +" ldr r2, [r3, r2, lsl #2] \n" // 4 +" uxth r1, r0 \n" // 4 +" ldrb r1, [r2, r1, lsr #2] \n" // 5 +" cmp r1, #0xAA \n" // 6 0xAA == VA_BITS8_DEFINED +" bne .LLV32LEc0 \n" // 7 if !all_defined +" mov r0, #0x0 \n" // 8 0x0 == V_BITS32_DEFINED +" bx lr \n" // 9 +".LLV32LEc0: \n" +" cmp r1, #0x55 \n" // 0x55 == VA_BITS8_UNDEFINED +" bne .LLV32LEc4 \n" // if !all_undefined +" mov r0, #0xFFFFFFFF \n" // 0xFFFFFFFF == V_BITS32_UNDEFINED +" bx lr \n" +".LLV32LEc4: \n" +" push {r4, lr} \n" +" mov r2, #0 \n" +" mov r1, #32 \n" +" bl mc_LOADVn_slow \n" +" pop {r4, pc} \n" +".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le \n" +".previous\n" +); + +#elif ENABLE_ASSEMBLY_HELPERS && defined(PERF_FAST_LOADV) \ + && (defined(VGP_x86_linux) || defined(VGP_x86_solaris)) +__asm__( +".text\n" +".align 16\n" +".global vgMemCheck_helperc_LOADV32le\n" +".type vgMemCheck_helperc_LOADV32le, @function\n" +"vgMemCheck_helperc_LOADV32le:\n" +" test $0x3, %eax\n" +" jnz .LLV32LE2\n" /* jump if misaligned */ +" mov %eax, %edx\n" +" shr $16, %edx\n" +" mov primary_map(,%edx,4), %ecx\n" +" movzwl %ax, %edx\n" +" shr $2, %edx\n" +" movzbl (%ecx,%edx,1), %edx\n" +" cmp $0xaa, %edx\n" /* compare to VA_BITS8_DEFINED */ +" jne .LLV32LE1\n" /* jump if not completely defined */ +" xor %eax, %eax\n" /* else return V_BITS32_DEFINED */ +" ret\n" +".LLV32LE1:\n" +" cmp $0x55, %edx\n" /* compare to VA_BITS8_UNDEFINED */ +" jne .LLV32LE2\n" /* jump if not completely undefined */ +" or $0xffffffff, %eax\n" /* else return V_BITS32_UNDEFINED */ +" ret\n" +".LLV32LE2:\n" +" xor %ecx, %ecx\n" /* tail call mc_LOADVn_slow(a, 32, 0) */ +" mov $32, %edx\n" +" jmp mc_LOADVn_slow\n" +".size vgMemCheck_helperc_LOADV32le, .-vgMemCheck_helperc_LOADV32le\n" +".previous\n" +); + +#else +// Generic for all platforms except {arm32,x86}-linux and x86-solaris +// is in mc_main.c +#endif + +/*--------------------------------------------------------------------*/ +/*--- end ---*/ +/*--------------------------------------------------------------------*/ diff --git a/none/Makefile.am b/none/Makefile.am index 753687eecb..5d8b72ee98 100644 --- a/none/Makefile.am +++ b/none/Makefile.am @@ -17,7 +17,7 @@ none_@VGCONF_ARCH_PRI@_@VGCONF_OS@_SOURCES = \ $(NONE_SOURCES_COMMON) none_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) -none_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = \ +none_@VGCONF_ARCH_PRI@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ $(AM_CFLAGS_@VGCONF_PLATFORM_PRI_CAPS@) none_@VGCONF_ARCH_PRI@_@VGCONF_OS@_DEPENDENCIES = \ $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_PRI_CAPS@) @@ -37,7 +37,7 @@ none_@VGCONF_ARCH_SEC@_@VGCONF_OS@_SOURCES = \ $(NONE_SOURCES_COMMON) none_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CPPFLAGS = \ $(AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) -none_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = \ +none_@VGCONF_ARCH_SEC@_@VGCONF_OS@_CFLAGS = $(LTO_CFLAGS) \ $(AM_CFLAGS_@VGCONF_PLATFORM_SEC_CAPS@) none_@VGCONF_ARCH_SEC@_@VGCONF_OS@_DEPENDENCIES = \ $(TOOL_DEPENDENCIES_@VGCONF_PLATFORM_SEC_CAPS@)