From: Matthew Barr <matthew.barr@intel.com>
Date: Mon, 6 Mar 2017 22:58:24 +0000 (+1100)
Subject: Use intrinsic to get correct movq everywhere
X-Git-Tag: v4.5.0^2~193
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=d2416736cb586d380ffb9b1ff3b63194247d7e81;p=thirdparty%2Fvectorscan.git

Use intrinsic to get correct movq everywhere

The real trick here is that _mm_set_epi64x() (note the 'x') takes a 64-bit
value - not a ptr to a 128-bit value like the non-x - so compilers don't
twist themselves in knots with alignment or whatever confuses them.
---

diff --git a/src/util/simd_utils.h b/src/util/simd_utils.h
index c6d43f57..484b47c0 100644
--- a/src/util/simd_utils.h
+++ b/src/util/simd_utils.h
@@ -180,9 +180,7 @@ static really_inline u64a movq(const m128 in) {
 /* another form of movq */
 static really_inline
 m128 load_m128_from_u64a(const u64a *p) {
-    m128 out;
-    __asm__ ("vmovq\t%1,%0" : "=x"(out) :"m"(*p));
-    return out;
+    return _mm_set_epi64x(0LL, *p);
 }
 
 #define rshiftbyte_m128(a, count_immed) _mm_srli_si128(a, count_immed)