Ensure that m256 is 32-aligned on non-avx2 builds

author Matthew Barr <matthew.barr@intel.com>

Thu, 7 Jul 2016 04:00:11 +0000 (14:00 +1000)

committer Matthew Barr <matthew.barr@intel.com>

Wed, 10 Aug 2016 04:52:56 +0000 (14:52 +1000)
author Matthew Barr <matthew.barr@intel.com>
Thu, 7 Jul 2016 04:00:11 +0000 (14:00 +1000)
committer Matthew Barr <matthew.barr@intel.com>
Wed, 10 Aug 2016 04:52:56 +0000 (14:52 +1000)
diff --git a/src/ue2common.h b/src/ue2common.h

index 2de607532e36646bada59a9dc78e8ff02bcb54e2..e1f03f721b639d6c80a8a282529be4520b0801d9 100644 (file)
--- a/src/ue2common.h
+++ b/src/ue2common.h
@@ -52,6 +52,9 @@
  #define ALIGN_ATTR(x) __attribute__((aligned((x))))
  #endif
  
+#define ALIGN_DIRECTIVE ALIGN_ATTR(16)
+#define ALIGN_AVX_DIRECTIVE ALIGN_ATTR(32)
+#define ALIGN_CL_DIRECTIVE ALIGN_ATTR(64)
  
  typedef signed char s8;
  typedef unsigned char u8;
@@ -82,10 +85,6 @@ typedef u32 ReportID;
  #define HS_PUBLIC_API
  #endif
  
-#define ALIGN_DIRECTIVE ALIGN_ATTR(16)
-#define ALIGN_AVX_DIRECTIVE ALIGN_ATTR(32)
-#define ALIGN_CL_DIRECTIVE ALIGN_ATTR(64)
-
  #define ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0]))
  
  /** \brief Shorthand for the attribute to shut gcc about unused parameters */
diff --git a/src/util/simd_types.h b/src/util/simd_types.h

index 63311b10fa67b223baa275e16bd79e056211f389..e454141116fbffb53d036298e5316999a6e3533f 100644 (file)
--- a/src/util/simd_types.h
+++ b/src/util/simd_types.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -65,7 +65,7 @@ typedef __m128i m128;
  #if defined(__AVX2__)
  typedef __m256i m256;
  #else
-typedef struct ALIGN_AVX_DIRECTIVE {m128 lo; m128 hi;} m256;
+typedef ALIGN_AVX_DIRECTIVE struct {m128 lo; m128 hi;} m256;
  #endif
  
  // these should align to 16 and 32 respectively
diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h

index 5f557ba511b6fd7ea5641952b6a4d321cfd934bd..8cea458e442d3ec7c8128a06425f2f7d0da8a526 100644 (file)
--- a/src/util/simd_utils.h
+++ b/src/util/simd_utils.h
@@ -493,11 +493,10 @@ static really_inline u32 diffrich64_256(m256 a, m256 b) {
  
  // aligned load
  static really_inline m256 load256(const void *ptr) {
-#if defined(__AVX2__)
      assert(ISALIGNED_N(ptr, alignof(m256)));
+#if defined(__AVX2__)
      return _mm256_load_si256((const m256 *)ptr);
  #else
-    assert(ISALIGNED_N(ptr, alignof(m128)));
      m256 rv = { load128(ptr), load128((const char *)ptr + 16) };
      return rv;
  #endif
@@ -517,11 +516,10 @@ static really_inline m256 load2x128(const void *ptr) {
  
  // aligned store
  static really_inline void store256(void *ptr, m256 a) {
-#if defined(__AVX2__)
      assert(ISALIGNED_N(ptr, alignof(m256)));
+#if defined(__AVX2__)
      _mm256_store_si256((m256 *)ptr, a);
  #else
-    assert(ISALIGNED_16(ptr));
      ptr = assume_aligned(ptr, 16);
      *(m256 *)ptr = a;
  #endif
@@ -943,19 +941,19 @@ static really_inline u32 diffrich64_512(m512 a, m512 b) {
  
  // aligned load
  static really_inline m512 load512(const void *ptr) {
-    assert(ISALIGNED_16(ptr));
+    assert(ISALIGNED_N(ptr, alignof(m256)));
      m512 rv = { load256(ptr), load256((const char *)ptr + 32) };
      return rv;
  }
  
  // aligned store
  static really_inline void store512(void *ptr, m512 a) {
+    assert(ISALIGNED_N(ptr, alignof(m256)));
  #if defined(__AVX2__)
      m512 *x = (m512 *)ptr;
      store256(&x->lo, a.lo);
      store256(&x->hi, a.hi);
  #else
-    assert(ISALIGNED_16(ptr));
      ptr = assume_aligned(ptr, 16);
      *(m512 *)ptr = a;
  #endif
author	Matthew Barr <matthew.barr@intel.com>
	Thu, 7 Jul 2016 04:00:11 +0000 (14:00 +1000)
committer	Matthew Barr <matthew.barr@intel.com>
	Wed, 10 Aug 2016 04:52:56 +0000 (14:52 +1000)
src/ue2common.h		patch \| blob \| blame \| history
src/util/simd_types.h		patch \| blob \| blame \| history
src/util/simd_utils.h		patch \| blob \| blame \| history