Clear upper half of AVX register before libm call

author Matthew Barr <matthew.barr@intel.com>

Fri, 10 Feb 2017 00:29:42 +0000 (11:29 +1100)

committer Matthew Barr <matthew.barr@intel.com>

Wed, 26 Apr 2017 04:59:22 +0000 (14:59 +1000)
author Matthew Barr <matthew.barr@intel.com>
Fri, 10 Feb 2017 00:29:42 +0000 (11:29 +1100)
committer Matthew Barr <matthew.barr@intel.com>
Wed, 26 Apr 2017 04:59:22 +0000 (14:59 +1000)
diff --git a/CMakeLists.txt b/CMakeLists.txt

index 4ec1f9e64466291d330dc766eecfa7d9aea89969..8329c0ba6e36b4c693d024e84783715f7949be2a 100644 (file)
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -991,6 +991,7 @@ SET (hs_SRCS
      src/util/fatbit_build.h
      src/util/graph.h
      src/util/hash.h
+    src/util/math.h
      src/util/multibit_build.cpp
      src/util/multibit_build.h
      src/util/order_check.h
diff --git a/src/fdr/fdr_compile.cpp b/src/fdr/fdr_compile.cpp

index 953b2dab879bbca6b76059cb0a7e87ae421214e4..f99fcb65ae0beaa35a04efb4ccf9fbc3fe204c46 100644 (file)
--- a/src/fdr/fdr_compile.cpp
+++ b/src/fdr/fdr_compile.cpp
@@ -43,6 +43,7 @@
  #include "util/alloc.h"
  #include "util/compare.h"
  #include "util/dump_mask.h"
+#include "util/math.h"
  #include "util/target_info.h"
  #include "util/ue2string.h"
  #include "util/verify_types.h"
@@ -195,7 +196,7 @@ aligned_unique_ptr<FDR> FDRCompiler::setupFDR() {
  static
  double getScoreUtil(u32 len, u32 count) {
      return len == 0 ? numeric_limits<double>::max()
-                    : pow(count, 1.05) * pow(len, -3.0);
+                    : our_pow(count, 1.05) * our_pow(len, -3.0);
  }
  
  /**
diff --git a/src/util/math.h b/src/util/math.h

new file mode 100644 (file)

index 0000000..80ad492
--- /dev/null
+++ b/src/util/math.h
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 2017, Intel Corporation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ *  * Redistributions of source code must retain the above copyright notice,
+ *    this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of Intel Corporation nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef UTIL_MATH_H_
+#define UTIL_MATH_H_
+
+#include <math.h>
+
+#ifdef __cplusplus
+# if defined(HAVE_CXX_X86INTRIN_H)
+#  define USE_X86INTRIN_H
+# endif
+#else // C
+# if defined(HAVE_C_X86INTRIN_H)
+#  define USE_X86INTRIN_H
+# endif
+#endif
+
+#ifdef __cplusplus
+# if defined(HAVE_CXX_INTRIN_H)
+#  define USE_INTRIN_H
+# endif
+#else // C
+# if defined(HAVE_C_INTRIN_H)
+#  define USE_INTRIN_H
+# endif
+#endif
+
+#if defined(USE_X86INTRIN_H)
+#include <x86intrin.h>
+#elif defined(USE_INTRIN_H)
+#include <intrin.h>
+#endif
+
+static really_inline
+double our_pow(double x, double y) {
+#if defined(__AVX__)
+    /*
+     * Clear the upper half of AVX registers before calling into the math lib.
+     * On some versions of glibc this can save thousands of AVX-to-SSE
+     * transitions.
+     */
+    _mm256_zeroupper();
+#endif
+    return pow(x, y);
+}
+
+#endif // UTIL_MATH_H_
author	Matthew Barr <matthew.barr@intel.com>
	Fri, 10 Feb 2017 00:29:42 +0000 (11:29 +1100)
committer	Matthew Barr <matthew.barr@intel.com>
	Wed, 26 Apr 2017 04:59:22 +0000 (14:59 +1000)
CMakeLists.txt		patch \| blob \| blame \| history
src/fdr/fdr_compile.cpp		patch \| blob \| blame \| history
src/util/math.h	[new file with mode: 0644]	patch \| blob