]>
git.ipfire.org Git - thirdparty/gcc.git/blob - libitm/config/x86/unaligned.h
1 /* Copyright (C) 2009, 2011 Free Software Foundation, Inc.
2 Contributed by Richard Henderson <rth@redhat.com>.
4 This file is part of the GNU Transactional Memory Library (libitm).
6 Libitm is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 Under Section 7 of GPL version 3, you are granted additional
17 permissions described in the GCC Runtime Library Exception, version
18 3.1, as published by the Free Software Foundation.
20 You should have received a copy of the GNU General Public License and
21 a copy of the GCC Runtime Library Exception along with this program;
22 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 <http://www.gnu.org/licenses/>. */
25 #ifndef LIBITM_X86_UNALIGNED_H
26 #define LIBITM_X86_UNALIGNED_H 1
28 #define HAVE_ARCH_UNALIGNED_LOAD2_U4 1
29 #define HAVE_ARCH_UNALIGNED_LOAD2_U8 1
31 #include "config/generic/unaligned.h"
33 namespace GTM HIDDEN
{
37 unaligned_load2
<uint32_t>(const gtm_cacheline
*c1
,
38 const gtm_cacheline
*c2
, size_t ofs
)
41 lo
= c1
->u32
[CACHELINE_SIZE
/ sizeof(uint32_t) - 1];
43 asm("shrd %b2, %1, %0" : "=r"(r
) : "r"(hi
), "c"((ofs
& 3) * 8), "0"(lo
));
49 unaligned_load2
<uint64_t>(const gtm_cacheline
*c1
,
50 const gtm_cacheline
*c2
, size_t ofs
)
54 lo
= c1
->u64
[CACHELINE_SIZE
/ sizeof(uint64_t) - 1];
56 asm("shrd %b2, %1, %0" : "=r"(r
) : "r"(hi
), "c"((ofs
& 3) * 8), "0"(lo
));
62 if (ofs
< CACHELINE_SIZE
- 4)
64 v0
= c1
->u32
[CACHELINE_SIZE
/ sizeof(uint32_t) - 2];
65 v1
= c1
->u32
[CACHELINE_SIZE
/ sizeof(uint32_t) - 1];
70 v0
= c1
->u32
[CACHELINE_SIZE
/ sizeof(uint32_t) - 1];
75 asm("shrd %%cl, %[v1], %[v0]; shrd %%cl, %[v2], %[v1]"
76 : "=A"(r
) : "c"(ofs
), [v0
] "a"(v0
), [v1
] "d"(v1
), [v2
] "r"(v2
));
82 #if defined(__SSE2__) || defined(__MMX__)
85 unaligned_load2
<_ITM_TYPE_M64
>(const gtm_cacheline
*c1
,
86 const gtm_cacheline
*c2
, size_t ofs
)
89 __m128i lo
= _mm_movpi64_epi64 (c1
->m64
[CACHELINE_SIZE
/ 8 - 1]);
90 __m128i hi
= _mm_movpi64_epi64 (c2
->m64
[0]);
93 lo
= _mm_srli_epi64 (lo
, ofs
);
94 hi
= _mm_slli_epi64 (hi
, 64 - ofs
);
96 return _mm_movepi64_pi64 (lo
);
98 // On 32-bit we're about to return the result in an MMX register, so go
99 // ahead and do the computation in that unit, even if SSE2 is available.
100 __m64 lo
= c1
->m64
[CACHELINE_SIZE
/ 8 - 1];
101 __m64 hi
= c2
->m64
[0];
104 lo
= _mm_srli_si64 (lo
, ofs
);
105 hi
= _mm_slli_si64 (hi
, 64 - ofs
);
109 #endif // SSE2 or MMX
111 // The SSE types are strictly aligned.
114 struct strict_alignment
<_ITM_TYPE_M128
>
115 : public std::true_type
118 // Expand the unaligned SSE move instructions.
120 inline _ITM_TYPE_M128
121 unaligned_load
<_ITM_TYPE_M128
>(const void *t
)
123 return _mm_loadu_ps (static_cast<const float *>(t
));
128 unaligned_store
<_ITM_TYPE_M128
>(void *t
, _ITM_TYPE_M128 val
)
130 _mm_storeu_ps (static_cast<float *>(t
), val
);
135 // The AVX types are strictly aligned when it comes to vmovaps vs vmovups.
137 struct strict_alignment
<_ITM_TYPE_M256
>
138 : public std::true_type
142 inline _ITM_TYPE_M256
143 unaligned_load
<_ITM_TYPE_M256
>(const void *t
)
145 return _mm256_loadu_ps (static_cast<const float *>(t
));
150 unaligned_store
<_ITM_TYPE_M256
>(void *t
, _ITM_TYPE_M256 val
)
152 _mm256_storeu_ps (static_cast<float *>(t
), val
);
157 # define HAVE_ARCH_REALIGN_M128I 1
158 extern const __v16qi GTM_vpperm_shift
[16];
160 realign_m128i (__m128i lo
, __m128i hi
, unsigned byte_count
)
162 return _mm_perm_epi8 (lo
, hi
, GTM_vpperm_shift
[byte_count
]);
164 #elif defined(__AVX__)
165 # define HAVE_ARCH_REALIGN_M128I 1
166 extern "C" const uint64_t GTM_vpalignr_table
[16];
168 realign_m128i (__m128i lo
, __m128i hi
, unsigned byte_count
)
170 register __m128i xmm0
__asm__("xmm0") = hi
;
171 register __m128i xmm1
__asm__("xmm1") = lo
;
172 __asm("call *%2" : "+x"(xmm0
) : "x"(xmm1
),
173 "r"(>M_vpalignr_table
[byte_count
]));
176 #elif defined(__SSSE3__)
177 # define HAVE_ARCH_REALIGN_M128I 1
178 extern "C" const uint64_t GTM_palignr_table
[16];
180 realign_m128i (__m128i lo
, __m128i hi
, unsigned byte_count
)
182 register __m128i xmm0
__asm__("xmm0") = hi
;
183 register __m128i xmm1
__asm__("xmm1") = lo
;
184 __asm("call *%2" : "+x"(xmm0
) : "x"(xmm1
),
185 "r"(>M_palignr_table
[byte_count
]));
188 #elif defined(__SSE2__)
189 # define HAVE_ARCH_REALIGN_M128I 1
190 extern "C" const char GTM_pshift_table
[16 * 16];
192 realign_m128i (__m128i lo
, __m128i hi
, unsigned byte_count
)
194 register __m128i xmm0
__asm__("xmm0") = lo
;
195 register __m128i xmm1
__asm__("xmm1") = hi
;
196 __asm("call *%2" : "+x"(xmm0
), "+x"(xmm1
)
197 : "r"(GTM_pshift_table
+ byte_count
*16));
200 #endif // XOP, AVX, SSSE3, SSE2
202 #ifdef HAVE_ARCH_REALIGN_M128I
204 inline _ITM_TYPE_M128
205 unaligned_load2
<_ITM_TYPE_M128
>(const gtm_cacheline
*c1
,
206 const gtm_cacheline
*c2
, size_t ofs
)
208 return (_ITM_TYPE_M128
)
209 realign_m128i (c1
->m128i
[CACHELINE_SIZE
/ 16 - 1],
210 c2
->m128i
[0], ofs
& 15);
212 #endif // HAVE_ARCH_REALIGN_M128I
216 inline _ITM_TYPE_M256
217 unaligned_load2
<_ITM_TYPE_M256
>(const gtm_cacheline
*c1
,
218 const gtm_cacheline
*c2
, size_t ofs
)
223 v0
= (__m128i
) unaligned_load2
<_ITM_TYPE_M128
>(c1
, c2
, ofs
);
224 if (ofs
< CACHELINE_SIZE
- 16)
225 v1
= v0
, v0
= _mm_loadu_si128 ((const __m128i
*) &c1
->b
[ofs
]);
227 v1
= _mm_loadu_si128((const __m128i
*)&c2
->b
[ofs
+ 16 - CACHELINE_SIZE
]);
229 r
= _mm256_castsi128_si256 ((__m128i
)v0
);
230 r
= _mm256_insertf128_si256 (r
, (__m128i
)v1
, 1);
231 return (_ITM_TYPE_M256
) r
;
237 #endif // LIBITM_X86_UNALIGNED_H