/* Unaligned version of the same types. */
typedef long long __m128i_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
typedef double __m128d_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
+typedef double double_u __attribute__ ((__may_alias__, __aligned__ (1)));
/* Create a selector for use with the SHUFPD instruction. */
#define _MM_SHUFFLE2(fp1,fp0) \
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_sd (double const *__P)
{
- return _mm_set_sd (*__P);
+ return __extension__ (__m128d) { *(double_u *)__P, 0.0 };
}
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_sd (double *__P, __m128d __A)
{
- *__P = ((__v2df)__A)[0];
+ *(double_u *)__P = ((__v2df)__A)[0] ;
}
extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__))
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_storel_pd (double *__P, __m128d __A)
{
- _mm_store_sd (__P, __A);
+ *__P = ((__v2df)__A)[0];
}
/* Stores the upper DPFP value. */
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadh_pd (__m128d __A, double const *__B)
{
- return (__m128d)__builtin_ia32_loadhpd ((__v2df)__A, __B);
+ return __extension__ (__m128d) { ((__v2df)__A)[0], *(double_u*)__B };
}
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadl_pd (__m128d __A, double const *__B)
{
- return (__m128d)__builtin_ia32_loadlpd ((__v2df)__A, __B);
+ return __extension__ (__m128d) { *(double_u*)__B, ((__v2df)__A)[1] };
}
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
/* Unaligned version of the same type. */
typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1)));
+typedef float float_u __attribute__ ((__may_alias__, __aligned__ (1)));
/* Internal data types for implementing the intrinsics. */
typedef float __v4sf __attribute__ ((__vector_size__ (16)));
/* Sets the upper two SPFP values with 64-bits of data loaded from P;
the lower two values are passed through from A. */
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_loadh_pi (__m128 __A, __m64 const *__P)
+_mm_loadh_pi (__m128 __A, __m64_u const *__P)
{
return (__m128) __builtin_ia32_loadhps ((__v4sf)__A, (const __v2sf *)__P);
}
/* Sets the lower two SPFP values with 64-bits of data loaded from P;
the upper two values are passed through from A. */
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_loadl_pi (__m128 __A, __m64 const *__P)
+_mm_loadl_pi (__m128 __A, __m64_u const *__P)
{
return (__m128) __builtin_ia32_loadlps ((__v4sf)__A, (const __v2sf *)__P);
}
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_load_ss (float const *__P)
{
- return _mm_set_ss (*__P);
+ return __extension__ (__m128) (__v4sf){ *(float_u *)__P, 0.0f, 0.0f, 0.0f };
}
/* Create a vector with all four elements equal to *P. */
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_store_ss (float *__P, __m128 __A)
{
- *__P = ((__v4sf)__A)[0];
+ *(float_u *)__P = ((__v4sf)__A)[0];
}
extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
--- /dev/null
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-fsanitize=undefined" } */
+#include <emmintrin.h>
+
+int main()
+{
+ unsigned char t[16+1];
+ __m128d x = _mm_load_sd((const double *)(t+1));
+ _mm_store_sd((double*)(t+1), x);
+ return 0;
+}
--- /dev/null
+/* { dg-do run { target { ! ia32 } } } */
+/* { dg-options "-fsanitize=undefined" } */
+#include <emmintrin.h>
+
+int main()
+{
+ unsigned char t[8+1];
+ __m128 x = _mm_load_ss((const float *)(t));
+ _mm_store_ss((float*)(t), x);
+ return 0;
+}