#include "third_party/xxhash.h"
+// With older GCC (libgcc), __builtin_cpu_supports("avx2) would return true if
+// AVX2 was supported by the CPU but disabled by the OS. This was fixed in GCC
+// 8, 7.4 and 6.5 (see https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85100).
+//
+// For clang it seems to be correct if compiler-rt is used as -rtlib, at least
+// as of 3.9 (see https://bugs.llvm.org/show_bug.cgi?id=25510). But if libgcc
+// is used we have the same problem as above. Unfortunately there doesn't seem
+// to be a way to detect which one is used, or the version of libgcc when used
+// by clang. So assume that it works with clang >= 3.9.
+#if !(__GNUC__ >= 8 || (__GNUC__ == 7 && __GNUC_MINOR__ >= 4) \
+ || (__GNUC__ == 6 && __GNUC_MINOR__ >= 5) || __clang_major__ > 3 \
+ || (__clang_major__ == 3 && __clang_minor__ >= 9))
+# undef HAVE_AVX2
+#endif
+
+#ifdef HAVE_AVX2
+# include <immintrin.h>
+#endif
+
unsigned
hash_from_int(int i)
{
return XXH64(&i, sizeof(int), 0);
}
-// Search for the strings "__DATE__" and "__TIME__" in str.
-//
-// Returns a bitmask with HASH_SOURCE_CODE_FOUND_DATE and
-// HASH_SOURCE_CODE_FOUND_TIME set appropriately.
-int
-check_for_temporal_macros(const char* str, size_t len)
+static int
+check_for_temporal_macros_bmh(const char* str, size_t len)
{
int result = 0;
return result;
}
+#ifdef HAVE_AVX2
+static int check_for_temporal_macros_avx2(const char* str, size_t len)
+ __attribute__((target("avx2")));
+
+// The following algorithm, which uses AVX2 instructions to find __DATE__ and
+// __TIME__, is heavily inspired by http://0x80.pl/articles/simd-strfind.html
+static int
+check_for_temporal_macros_avx2(const char* str, size_t len)
+{
+ int result = 0;
+
+ // Set all 32 bytes in first and last to '_' and 'E' respectively.
+ const __m256i first = _mm256_set1_epi8('_');
+ const __m256i last = _mm256_set1_epi8('E');
+
+ size_t pos = 0;
+ for (; pos + 5 + 32 <= len; pos += 32) {
+ // Load 32 bytes from the current position in the input string, with
+ // block_last being offset 5 bytes (i.e. the offset of 'E' in both macros).
+ const __m256i block_first =
+ _mm256_loadu_si256(reinterpret_cast<const __m256i*>(str + pos));
+ const __m256i block_last =
+ _mm256_loadu_si256(reinterpret_cast<const __m256i*>(str + pos + 5));
+
+ // For i in 0..31:
+ // eq_X[i] = 0xFF if X[i] == block_X[i] else 0
+ const __m256i eq_first = _mm256_cmpeq_epi8(first, block_first);
+ const __m256i eq_last = _mm256_cmpeq_epi8(last, block_last);
+
+ // Set bit i in mask if byte i in both eq_first and eq_last have most
+ // significant bit set.
+ uint32_t mask = _mm256_movemask_epi8(_mm256_and_si256(eq_first, eq_last));
+
+ // A bit set in mask now indicate a possible location for a temporal macro.
+ while (mask != 0) {
+ // The start position + 1 (as we know the first char is _).
+ const auto start = pos + __builtin_ctz(mask) + 1;
+
+ // Clear the least significant bit set.
+ mask = mask & (mask - 1);
+
+ if (start + 7 > len) {
+ break;
+ }
+
+ int found = 0;
+ if (memcmp(str + start, "_DATE__", 7) == 0) {
+ found = HASH_SOURCE_CODE_FOUND_DATE;
+ } else if (memcmp(str + start, "_TIME__", 7) == 0) {
+ found = HASH_SOURCE_CODE_FOUND_TIME;
+ } else {
+ continue;
+ }
+
+ // Check char before and after macro to verify that the found macro isn't
+ // part of another identifier.
+ if ((start == 1 || (str[start - 2] != '_' && !isalnum(str[start - 2])))
+ && (start + 7 == len
+ || (str[start + 7] != '_' && !isalnum(str[start + 7])))) {
+ result |= found;
+ }
+ }
+ }
+
+ result |= check_for_temporal_macros_bmh(str + pos, len - pos);
+
+ return result;
+}
+#endif
+
+// Search for the strings "__DATE__" and "__TIME__" in str.
+//
+// Returns a bitmask with HASH_SOURCE_CODE_FOUND_DATE and
+// HASH_SOURCE_CODE_FOUND_TIME set appropriately.
+int
+check_for_temporal_macros(const char* str, size_t len)
+{
+#ifdef HAVE_AVX2
+ if (__builtin_cpu_supports("avx2")) {
+ return check_for_temporal_macros_avx2(str, len);
+ }
+#endif
+ return check_for_temporal_macros_bmh(str, len);
+}
+
// Hash a string. Returns a bitmask of HASH_SOURCE_CODE_* results.
int
hash_source_code_string(const Config& config,
"#define ab __TIME __\n"
"#define ab __TIME_ _\n";
+ const char temporal_at_avx_boundary[] =
+ "#define alphabet abcdefghijklmnopqrstuvwxyz\n"
+ "__DATE__";
+
CHECK(check_for_temporal_macros(time_start + 0, sizeof(time_start) - 0));
CHECK(!check_for_temporal_macros(time_start + 1, sizeof(time_start) - 1));
CHECK(!check_for_temporal_macros(no_temporal + 5, sizeof(no_temporal) - 5));
CHECK(!check_for_temporal_macros(no_temporal + 6, sizeof(no_temporal) - 6));
CHECK(!check_for_temporal_macros(no_temporal + 7, sizeof(no_temporal) - 7));
+
+ for (size_t i = 0; i < sizeof(temporal_at_avx_boundary) - 8; ++i) {
+ CHECKM(check_for_temporal_macros(temporal_at_avx_boundary + i,
+ sizeof(temporal_at_avx_boundary) - i),
+ temporal_at_avx_boundary + i);
+ }
}
TEST_SUITE_END