+2014-03-03 Uros Bizjak <ubizjak@gmail.com>
+
+ * config/i386/xmmintrin.h (enum _mm_hint) <_MM_HINT_ET0>: Correct
+ hint value.
+ (_mm_prefetch): Move out of GCC target("sse") pragma.
+ * config/i386/prfchwintrin.h (_m_prefetchw): Move out of
+ GCC target("prfchw") pragma.
+ * config/i386/i386.md (prefetch): Emit prefetchwt1 only
+ for locality <= 2.
+ * config/i386/i386.c (ix86_option_override_internal): Enable
+ -mprfchw with -mprefetchwt1.
+
2014-03-03 Joern Rennecke <joern.rennecke@embecosm.com>
* config/arc/arc.md (casesi_load) <length attribute alternative 0>:
* doc/avr-mmcu.texi: Regenerate.
2014-03-03 Tobias Grosser <tobias@grosser.es>
- Mircea Namolaru <mircea.namolaru@inria.fr>
+ Mircea Namolaru <mircea.namolaru@inria.fr>
PR tree-optimization/58028
* graphite-clast-to-gimple.c (set_cloog_options): Don't remove
|| (TARGET_PRFCHW && !TARGET_3DNOW_P (opts->x_ix86_isa_flags)))
x86_prefetch_sse = true;
- /* Enable prefetch{,w} instructions for -m3dnow. */
- if (TARGET_3DNOW_P (opts->x_ix86_isa_flags))
+ /* Enable prefetch{,w} instructions for -m3dnow and -mprefetchwt1. */
+ if (TARGET_3DNOW_P (opts->x_ix86_isa_flags)
+ || TARGET_PREFETCHWT1_P (opts->x_ix86_isa_flags))
opts->x_ix86_isa_flags
|= OPTION_MASK_ISA_PRFCHW & ~opts->x_ix86_isa_flags_explicit;
supported by SSE counterpart or the SSE prefetch is not available
(K6 machines). Otherwise use SSE prefetch as it allows specifying
of locality. */
- if (TARGET_PREFETCHWT1 && write)
+ if (TARGET_PREFETCHWT1 && write && locality <= 2)
operands[2] = const2_rtx;
else if (TARGET_PRFCHW && (write || !TARGET_PREFETCH_SSE))
operands[2] = GEN_INT (3);
# error "Never use <prfchwintrin.h> directly; include <x86intrin.h> or <mm3dnow.h> instead."
#endif
-
#ifndef _PRFCHWINTRIN_H_INCLUDED
#define _PRFCHWINTRIN_H_INCLUDED
-#ifndef __PRFCHW__
-#pragma GCC push_options
-#pragma GCC target("prfchw")
-#define __DISABLE_PRFCHW__
-#endif /* __PRFCHW__ */
-
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_prefetchw (void *__P)
{
__builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
}
-#ifdef __DISABLE_PRFCHW__
-#undef __DISABLE_PRFCHW__
-#pragma GCC pop_options
-#endif /* __DISABLE_PRFCHW__ */
-
#endif /* _PRFCHWINTRIN_H_INCLUDED */
/* Get _mm_malloc () and _mm_free (). */
#include <mm_malloc.h>
+/* Constants for use with _mm_prefetch. */
+enum _mm_hint
+{
+ /* _MM_HINT_ET is _MM_HINT_T with set 3rd bit. */
+ _MM_HINT_ET0 = 7,
+ _MM_HINT_ET1 = 6,
+ _MM_HINT_T0 = 3,
+ _MM_HINT_T1 = 2,
+ _MM_HINT_T2 = 1,
+ _MM_HINT_NTA = 0
+};
+
+/* Loads one cache line from address P to a location "closer" to the
+ processor. The selector I specifies the type of prefetch operation. */
+#ifdef __OPTIMIZE__
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm_prefetch (const void *__P, enum _mm_hint __I)
+{
+ __builtin_prefetch (__P, (__I & 0x4) >> 2, __I & 0x3);
+}
+#else
+#define _mm_prefetch(P, I) \
+ __builtin_prefetch ((P), ((I & 0x4) >> 2), (I & 0x3))
+#endif
+
#ifndef __SSE__
#pragma GCC push_options
#pragma GCC target("sse")
#define _MM_SHUFFLE(fp3,fp2,fp1,fp0) \
(((fp3) << 6) | ((fp2) << 4) | ((fp1) << 2) | (fp0))
-/* Constants for use with _mm_prefetch. */
-enum _mm_hint
-{
- /* _MM_HINT_ET is _MM_HINT_T with set 3rd bit. */
- _MM_HINT_ET0 = 5,
- _MM_HINT_ET1 = 6,
- _MM_HINT_T0 = 3,
- _MM_HINT_T1 = 2,
- _MM_HINT_T2 = 1,
- _MM_HINT_NTA = 0
-};
-
/* Bits in the MXCSR. */
#define _MM_EXCEPT_MASK 0x003f
#define _MM_EXCEPT_INVALID 0x0001
return _mm_sad_pu8 (__A, __B);
}
-/* Loads one cache line from address P to a location "closer" to the
- processor. The selector I specifies the type of prefetch operation. */
-#ifdef __OPTIMIZE__
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_prefetch (const void *__P, enum _mm_hint __I)
-{
- __builtin_prefetch (__P, (__I & 0x4) >> 2, __I & 0x3);
-}
-#else
-#define _mm_prefetch(P, I) \
- __builtin_prefetch ((P), ((I & 0x4) >> 2), (I & 0x3))
-#endif
-
/* Stores the data in A to the address P without polluting the caches. */
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_pi (__m64 *__P, __m64 __A)