#define ALIGN_ATTR(x) __attribute__((aligned((x))))
#endif
+#define ALIGN_DIRECTIVE ALIGN_ATTR(16)
+#define ALIGN_AVX_DIRECTIVE ALIGN_ATTR(32)
+#define ALIGN_CL_DIRECTIVE ALIGN_ATTR(64)
typedef signed char s8;
typedef unsigned char u8;
#define HS_PUBLIC_API
#endif
-#define ALIGN_DIRECTIVE ALIGN_ATTR(16)
-#define ALIGN_AVX_DIRECTIVE ALIGN_ATTR(32)
-#define ALIGN_CL_DIRECTIVE ALIGN_ATTR(64)
-
#define ARRAY_LENGTH(a) (sizeof(a)/sizeof((a)[0]))
/** \brief Shorthand for the attribute to shut gcc about unused parameters */
/*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#if defined(__AVX2__)
typedef __m256i m256;
#else
-typedef struct ALIGN_AVX_DIRECTIVE {m128 lo; m128 hi;} m256;
+typedef ALIGN_AVX_DIRECTIVE struct {m128 lo; m128 hi;} m256;
#endif
// these should align to 16 and 32 respectively
// aligned load
static really_inline m256 load256(const void *ptr) {
-#if defined(__AVX2__)
assert(ISALIGNED_N(ptr, alignof(m256)));
+#if defined(__AVX2__)
return _mm256_load_si256((const m256 *)ptr);
#else
- assert(ISALIGNED_N(ptr, alignof(m128)));
m256 rv = { load128(ptr), load128((const char *)ptr + 16) };
return rv;
#endif
// aligned store
static really_inline void store256(void *ptr, m256 a) {
-#if defined(__AVX2__)
assert(ISALIGNED_N(ptr, alignof(m256)));
+#if defined(__AVX2__)
_mm256_store_si256((m256 *)ptr, a);
#else
- assert(ISALIGNED_16(ptr));
ptr = assume_aligned(ptr, 16);
*(m256 *)ptr = a;
#endif
// aligned load
static really_inline m512 load512(const void *ptr) {
- assert(ISALIGNED_16(ptr));
+ assert(ISALIGNED_N(ptr, alignof(m256)));
m512 rv = { load256(ptr), load256((const char *)ptr + 32) };
return rv;
}
// aligned store
static really_inline void store512(void *ptr, m512 a) {
+ assert(ISALIGNED_N(ptr, alignof(m256)));
#if defined(__AVX2__)
m512 *x = (m512 *)ptr;
store256(&x->lo, a.lo);
store256(&x->hi, a.hi);
#else
- assert(ISALIGNED_16(ptr));
ptr = assume_aligned(ptr, 16);
*(m512 *)ptr = a;
#endif