extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_srli_epi16 (__m512i __A, const int __imm)
+_mm512_srli_epi16 (__m512i __A, const unsigned int __imm)
{
return (__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi) __A, __imm,
(__v32hi)
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_srli_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
- const int __imm)
+ const unsigned int __imm)
{
return (__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi) __A, __imm,
(__v32hi) __W,
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_slli_epi16 (__m512i __A, const int __B)
+_mm512_slli_epi16 (__m512i __A, const unsigned int __B)
{
return (__m512i) __builtin_ia32_psllwi512_mask ((__v32hi) __A, __B,
(__v32hi)
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_slli_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
- const int __B)
+ const unsigned int __B)
{
return (__m512i) __builtin_ia32_psllwi512_mask ((__v32hi) __A, __B,
(__v32hi) __W,
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_slli_epi16 (__mmask32 __U, __m512i __A, const int __B)
+_mm512_maskz_slli_epi16 (__mmask32 __U, __m512i __A, const unsigned int __B)
{
return (__m512i) __builtin_ia32_psllwi512_mask ((__v32hi) __A, __B,
(__v32hi)
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_srai_epi16 (__m512i __A, const int __imm)
+_mm512_srai_epi16 (__m512i __A, const unsigned int __imm)
{
return (__m512i) __builtin_ia32_psrawi512_mask ((__v32hi) __A, __imm,
(__v32hi)
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_srai_epi16 (__m512i __W, __mmask32 __U, __m512i __A,
- const int __imm)
+ const unsigned int __imm)
{
return (__m512i) __builtin_ia32_psrawi512_mask ((__v32hi) __A, __imm,
(__v32hi) __W,
extern __inline __m512i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_srai_epi16 (__mmask32 __U, __m512i __A, const int __imm)
+_mm512_maskz_srai_epi16 (__mmask32 __U, __m512i __A, const unsigned int __imm)
{
return (__m512i) __builtin_ia32_psrawi512_mask ((__v32hi) __A, __imm,
(__v32hi)
#define _mm512_srli_epi16(A, B) \
((__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi)(__m512i)(A), \
- (int)(B), (__v32hi)_mm512_setzero_si512 (), (__mmask32)-1))
+ (unsigned int)(B), (__v32hi)_mm512_setzero_si512 (), (__mmask32)-1))
#define _mm512_mask_srli_epi16(W, U, A, B) \
((__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi)(__m512i)(A), \
- (int)(B), (__v32hi)(__m512i)(W), (__mmask32)(U)))
+ (unsigned int)(B), (__v32hi)(__m512i)(W), (__mmask32)(U)))
#define _mm512_maskz_srli_epi16(U, A, B) \
((__m512i) __builtin_ia32_psrlwi512_mask ((__v32hi)(__m512i)(A), \
(int)(B), (__v32hi)_mm512_setzero_si512 (), (__mmask32)(U)))
-#define _mm512_slli_epi16(X, C) \
- ((__m512i)__builtin_ia32_psllwi512_mask ((__v32hi)(__m512i)(X), (int)(C),\
- (__v32hi)(__m512i)_mm512_setzero_si512 (), \
+#define _mm512_slli_epi16(X, C) \
+ ((__m512i)__builtin_ia32_psllwi512_mask ((__v32hi)(__m512i)(X), \
+ (unsigned int)(C), \
+ (__v32hi)(__m512i)_mm512_setzero_si512 (), \
(__mmask32)-1))
-#define _mm512_mask_slli_epi16(W, U, X, C) \
- ((__m512i)__builtin_ia32_psllwi512_mask ((__v32hi)(__m512i)(X), (int)(C),\
- (__v32hi)(__m512i)(W),\
+#define _mm512_mask_slli_epi16(W, U, X, C) \
+ ((__m512i)__builtin_ia32_psllwi512_mask ((__v32hi)(__m512i)(X), \
+ (unsigned int)(C), \
+ (__v32hi)(__m512i)(W), \
(__mmask32)(U)))
-#define _mm512_maskz_slli_epi16(U, X, C) \
- ((__m512i)__builtin_ia32_psllwi512_mask ((__v32hi)(__m512i)(X), (int)(C),\
- (__v32hi)(__m512i)_mm512_setzero_si512 (), \
+#define _mm512_maskz_slli_epi16(U, X, C) \
+ ((__m512i)__builtin_ia32_psllwi512_mask ((__v32hi)(__m512i)(X), \
+ (unsigned int)(C), \
+ (__v32hi)(__m512i)_mm512_setzero_si512 (), \
(__mmask32)(U)))
#define _mm512_shufflehi_epi16(A, B) \
#define _mm512_srai_epi16(A, B) \
((__m512i) __builtin_ia32_psrawi512_mask ((__v32hi)(__m512i)(A), \
- (int)(B), (__v32hi)_mm512_setzero_si512 (), (__mmask32)-1))
+ (unsigned int)(B), (__v32hi)_mm512_setzero_si512 (), (__mmask32)-1))
#define _mm512_mask_srai_epi16(W, U, A, B) \
((__m512i) __builtin_ia32_psrawi512_mask ((__v32hi)(__m512i)(A), \
- (int)(B), (__v32hi)(__m512i)(W), (__mmask32)(U)))
+ (unsigned int)(B), (__v32hi)(__m512i)(W), (__mmask32)(U)))
#define _mm512_maskz_srai_epi16(U, A, B) \
((__m512i) __builtin_ia32_psrawi512_mask ((__v32hi)(__m512i)(A), \
- (int)(B), (__v32hi)_mm512_setzero_si512 (), (__mmask32)(U)))
+ (unsigned int)(B), (__v32hi)_mm512_setzero_si512 (), (__mmask32)(U)))
#define _mm512_mask_blend_epi16(__U, __A, __W) \
((__m512i) __builtin_ia32_blendmw_512_mask ((__v32hi) (__A), \
(__mmask8) __U);
}
#else
-#define _mm512_slli_epi64(X, C) \
- ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
- (__v8di)(__m512i)_mm512_undefined_epi32 (),\
+#define _mm512_slli_epi64(X, C) \
+ ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), \
+ (unsigned int)(C), \
+ (__v8di)(__m512i)_mm512_undefined_epi32 (), \
(__mmask8)-1))
-#define _mm512_mask_slli_epi64(W, U, X, C) \
- ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
- (__v8di)(__m512i)(W),\
+#define _mm512_mask_slli_epi64(W, U, X, C) \
+ ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), \
+ (unsigned int)(C), \
+ (__v8di)(__m512i)(W), \
(__mmask8)(U)))
-#define _mm512_maskz_slli_epi64(U, X, C) \
- ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
- (__v8di)(__m512i)_mm512_setzero_si512 (),\
+#define _mm512_maskz_slli_epi64(U, X, C) \
+ ((__m512i) __builtin_ia32_psllqi512_mask ((__v8di)(__m512i)(X), \
+ (unsigned int)(C), \
+ (__v8di)(__m512i)_mm512_setzero_si512 (), \
(__mmask8)(U)))
#endif
(__mmask8) __U);
}
#else
-#define _mm512_srli_epi64(X, C) \
- ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
- (__v8di)(__m512i)_mm512_undefined_epi32 (),\
+#define _mm512_srli_epi64(X, C) \
+ ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), \
+ (unsigned int)(C), \
+ (__v8di)(__m512i)_mm512_undefined_epi32 (), \
(__mmask8)-1))
-#define _mm512_mask_srli_epi64(W, U, X, C) \
- ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
- (__v8di)(__m512i)(W),\
+#define _mm512_mask_srli_epi64(W, U, X, C) \
+ ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), \
+ (unsigned int)(C), \
+ (__v8di)(__m512i)(W), \
(__mmask8)(U)))
-#define _mm512_maskz_srli_epi64(U, X, C) \
- ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
- (__v8di)(__m512i)_mm512_setzero_si512 (),\
+#define _mm512_maskz_srli_epi64(U, X, C) \
+ ((__m512i) __builtin_ia32_psrlqi512_mask ((__v8di)(__m512i)(X), \
+ (unsigned int)(C), \
+ (__v8di)(__m512i)_mm512_setzero_si512 (), \
(__mmask8)(U)))
#endif
(__mmask8) __U);
}
#else
-#define _mm512_srai_epi64(X, C) \
- ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
- (__v8di)(__m512i)_mm512_undefined_epi32 (),\
+#define _mm512_srai_epi64(X, C) \
+ ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), \
+ (unsigned int)(C), \
+ (__v8di)(__m512i)_mm512_undefined_epi32 (), \
(__mmask8)-1))
-#define _mm512_mask_srai_epi64(W, U, X, C) \
- ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
- (__v8di)(__m512i)(W),\
+#define _mm512_mask_srai_epi64(W, U, X, C) \
+ ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), \
+ (unsigned int)(C), \
+ (__v8di)(__m512i)(W), \
(__mmask8)(U)))
-#define _mm512_maskz_srai_epi64(U, X, C) \
- ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), (int)(C),\
- (__v8di)(__m512i)_mm512_setzero_si512 (),\
+#define _mm512_maskz_srai_epi64(U, X, C) \
+ ((__m512i) __builtin_ia32_psraqi512_mask ((__v8di)(__m512i)(X), \
+ (unsigned int)(C), \
+ (__v8di)(__m512i)_mm512_setzero_si512 (), \
(__mmask8)(U)))
#endif
(__mmask16) __U);
}
#else
-#define _mm512_slli_epi32(X, C) \
- ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
- (__v16si)(__m512i)_mm512_undefined_epi32 (),\
+#define _mm512_slli_epi32(X, C) \
+ ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), \
+ (unsigned int)(C), \
+ (__v16si)(__m512i)_mm512_undefined_epi32 (), \
(__mmask16)-1))
-#define _mm512_mask_slli_epi32(W, U, X, C) \
- ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
- (__v16si)(__m512i)(W),\
+#define _mm512_mask_slli_epi32(W, U, X, C) \
+ ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), \
+ (unsigned int)(C), \
+ (__v16si)(__m512i)(W), \
(__mmask16)(U)))
-#define _mm512_maskz_slli_epi32(U, X, C) \
- ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
- (__v16si)(__m512i)_mm512_setzero_si512 (),\
+#define _mm512_maskz_slli_epi32(U, X, C) \
+ ((__m512i) __builtin_ia32_pslldi512_mask ((__v16si)(__m512i)(X), \
+ (unsigned int)(C), \
+ (__v16si)(__m512i)_mm512_setzero_si512 (), \
(__mmask16)(U)))
#endif
(__mmask16) __U);
}
#else
-#define _mm512_srli_epi32(X, C) \
- ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+#define _mm512_srli_epi32(X, C) \
+ ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), \
+ (unsigned int)(C), \
(__v16si)(__m512i)_mm512_undefined_epi32 (),\
(__mmask16)-1))
-#define _mm512_mask_srli_epi32(W, U, X, C) \
- ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
- (__v16si)(__m512i)(W),\
+#define _mm512_mask_srli_epi32(W, U, X, C) \
+ ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), \
+ (unsigned int)(C), \
+ (__v16si)(__m512i)(W), \
(__mmask16)(U)))
-#define _mm512_maskz_srli_epi32(U, X, C) \
- ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), (int)(C),\
- (__v16si)(__m512i)_mm512_setzero_si512 (),\
+#define _mm512_maskz_srli_epi32(U, X, C) \
+ ((__m512i) __builtin_ia32_psrldi512_mask ((__v16si)(__m512i)(X), \
+ (unsigned int)(C), \
+ (__v16si)(__m512i)_mm512_setzero_si512 (), \
(__mmask16)(U)))
#endif
(__mmask16) __U);
}
#else
-#define _mm512_srai_epi32(X, C) \
- ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
+#define _mm512_srai_epi32(X, C) \
+ ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), \
+ (unsigned int)(C), \
(__v16si)(__m512i)_mm512_undefined_epi32 (),\
(__mmask16)-1))
-#define _mm512_mask_srai_epi32(W, U, X, C) \
- ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
- (__v16si)(__m512i)(W),\
+#define _mm512_mask_srai_epi32(W, U, X, C) \
+ ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), \
+ (unsigned int)(C), \
+ (__v16si)(__m512i)(W), \
(__mmask16)(U)))
-#define _mm512_maskz_srai_epi32(U, X, C) \
- ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), (int)(C),\
- (__v16si)(__m512i)_mm512_setzero_si512 (),\
+#define _mm512_maskz_srai_epi32(U, X, C) \
+ ((__m512i) __builtin_ia32_psradi512_mask ((__v16si)(__m512i)(X), \
+ (unsigned int)(C), \
+ (__v16si)(__m512i)_mm512_setzero_si512 (), \
(__mmask16)(U)))
#endif
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_srai_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
- const int __imm)
+ const unsigned int __imm)
{
return (__m256i) __builtin_ia32_psrawi256_mask ((__v16hi) __A, __imm,
(__v16hi) __W,
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_srai_epi16 (__mmask16 __U, __m256i __A, const int __imm)
+_mm256_maskz_srai_epi16 (__mmask16 __U, __m256i __A, const unsigned int __imm)
{
return (__m256i) __builtin_ia32_psrawi256_mask ((__v16hi) __A, __imm,
(__v16hi)
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_srai_epi16 (__m128i __W, __mmask8 __U, __m128i __A,
- const int __imm)
+ const unsigned int __imm)
{
return (__m128i) __builtin_ia32_psrawi128_mask ((__v8hi) __A, __imm,
(__v8hi) __W,
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_srai_epi16 (__mmask8 __U, __m128i __A, const int __imm)
+_mm_maskz_srai_epi16 (__mmask8 __U, __m128i __A, const unsigned int __imm)
{
return (__m128i) __builtin_ia32_psrawi128_mask ((__v8hi) __A, __imm,
(__v8hi)
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_slli_epi16 (__m256i __W, __mmask16 __U, __m256i __A,
- int __B)
+ unsigned int __B)
{
return (__m256i) __builtin_ia32_psllwi256_mask ((__v16hi) __A, __B,
(__v16hi) __W,
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_slli_epi16 (__mmask16 __U, __m256i __A, int __B)
+_mm256_maskz_slli_epi16 (__mmask16 __U, __m256i __A, unsigned int __B)
{
return (__m256i) __builtin_ia32_psllwi256_mask ((__v16hi) __A, __B,
(__v16hi)
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_slli_epi16 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
+_mm_mask_slli_epi16 (__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
{
return (__m128i) __builtin_ia32_psllwi128_mask ((__v8hi) __A, __B,
(__v8hi) __W,
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B)
+_mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, unsigned int __B)
{
return (__m128i) __builtin_ia32_psllwi128_mask ((__v8hi) __A, __B,
(__v8hi)
#define _mm_maskz_srli_epi16(U, A, B) \
((__m128i) __builtin_ia32_psrlwi128_mask ((__v8hi)(__m128i)(A), \
- (int)(B), (__v8hi)_mm_setzero_si128(), (__mmask8)(U)))
+ (int)(B), (__v8hi)_mm_setzero_si128 (), (__mmask8)(U)))
#define _mm256_mask_srai_epi16(W, U, A, B) \
((__m256i) __builtin_ia32_psrawi256_mask ((__v16hi)(__m256i)(A), \
- (int)(B), (__v16hi)(__m256i)(W), (__mmask16)(U)))
+ (unsigned int)(B), (__v16hi)(__m256i)(W), (__mmask16)(U)))
#define _mm256_maskz_srai_epi16(U, A, B) \
((__m256i) __builtin_ia32_psrawi256_mask ((__v16hi)(__m256i)(A), \
- (int)(B), (__v16hi)_mm256_setzero_si256 (), (__mmask16)(U)))
+ (unsigned int)(B), (__v16hi)_mm256_setzero_si256 (), (__mmask16)(U)))
#define _mm_mask_srai_epi16(W, U, A, B) \
((__m128i) __builtin_ia32_psrawi128_mask ((__v8hi)(__m128i)(A), \
- (int)(B), (__v8hi)(__m128i)(W), (__mmask8)(U)))
+ (unsigned int)(B), (__v8hi)(__m128i)(W), (__mmask8)(U)))
#define _mm_maskz_srai_epi16(U, A, B) \
((__m128i) __builtin_ia32_psrawi128_mask ((__v8hi)(__m128i)(A), \
- (int)(B), (__v8hi)_mm_setzero_si128(), (__mmask8)(U)))
+ (unsigned int)(B), (__v8hi)_mm_setzero_si128(), (__mmask8)(U)))
#define _mm256_mask_shufflehi_epi16(W, U, A, B) \
((__m256i) __builtin_ia32_pshufhw256_mask ((__v16hi)(__m256i)(A), (int)(B), \
(__v2di)(__m128i)_mm_setzero_si128 (), \
(__mmask16)(U)))
-#define _mm_mask_slli_epi16(W, U, X, C) \
- ((__m128i)__builtin_ia32_psllwi128_mask ((__v8hi)(__m128i)(X), (int)(C),\
- (__v8hi)(__m128i)(W),\
+#define _mm_mask_slli_epi16(W, U, X, C) \
+ ((__m128i)__builtin_ia32_psllwi128_mask ((__v8hi)(__m128i)(X), \
+ (unsigned int)(C), \
+ (__v8hi)(__m128i)(W), \
(__mmask8)(U)))
-#define _mm_maskz_slli_epi16(U, X, C) \
- ((__m128i)__builtin_ia32_psllwi128_mask ((__v8hi)(__m128i)(X), (int)(C),\
- (__v8hi)(__m128i)_mm_setzero_si128 (),\
+#define _mm_maskz_slli_epi16(U, X, C) \
+ ((__m128i)__builtin_ia32_psllwi128_mask ((__v8hi)(__m128i)(X), \
+ (unsigned int)(C), \
+ (__v8hi)(__m128i)_mm_setzero_si128 (), \
(__mmask8)(U)))
#define _mm256_dbsad_epu8(X, Y, C) \
(__v16hi)(__m256i)_mm256_setzero_si256(),\
(__mmask16)-1))
-#define _mm256_mask_slli_epi16(W, U, X, C) \
- ((__m256i)__builtin_ia32_psllwi256_mask ((__v16hi)(__m256i)(X), (int)(C),\
- (__v16hi)(__m256i)(W),\
+#define _mm256_mask_slli_epi16(W, U, X, C) \
+ ((__m256i)__builtin_ia32_psllwi256_mask ((__v16hi)(__m256i)(X), \
+ (unsigned int)(C), \
+ (__v16hi)(__m256i)(W), \
(__mmask16)(U)))
-#define _mm256_maskz_slli_epi16(U, X, C) \
- ((__m256i)__builtin_ia32_psllwi256_mask ((__v16hi)(__m256i)(X), (int)(C),\
- (__v16hi)(__m256i)_mm256_setzero_si256 (),\
+#define _mm256_maskz_slli_epi16(U, X, C) \
+ ((__m256i)__builtin_ia32_psllwi256_mask ((__v16hi)(__m256i)(X), \
+ (unsigned int)(C), \
+ (__v16hi)(__m256i)_mm256_setzero_si256 (), \
(__mmask16)(U)))
#define _mm256_mask_dbsad_epu8(W, U, X, Y, C) \
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_srli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
- const int __imm)
+ const unsigned int __imm)
{
return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
(__v8si) __W,
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const int __imm)
+_mm256_maskz_srli_epi32 (__mmask8 __U, __m256i __A, const unsigned int __imm)
{
return (__m256i) __builtin_ia32_psrldi256_mask ((__v8si) __A, __imm,
(__v8si)
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_srli_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
- const int __imm)
+ const unsigned int __imm)
{
return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
(__v4si) __W,
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const int __imm)
+_mm_maskz_srli_epi32 (__mmask8 __U, __m128i __A, const unsigned int __imm)
{
return (__m128i) __builtin_ia32_psrldi128_mask ((__v4si) __A, __imm,
(__v4si)
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_srli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
- const int __imm)
+ const unsigned int __imm)
{
return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
(__v4di) __W,
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const int __imm)
+_mm256_maskz_srli_epi64 (__mmask8 __U, __m256i __A, const unsigned int __imm)
{
return (__m256i) __builtin_ia32_psrlqi256_mask ((__v4di) __A, __imm,
(__v4di)
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_srli_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
- const int __imm)
+ const unsigned int __imm)
{
return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
(__v2di) __W,
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const int __imm)
+_mm_maskz_srli_epi64 (__mmask8 __U, __m128i __A, const unsigned int __imm)
{
return (__m128i) __builtin_ia32_psrlqi128_mask ((__v2di) __A, __imm,
(__v2di)
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_srai_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
- const int __imm)
+ const unsigned int __imm)
{
return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
(__v8si) __W,
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const int __imm)
+_mm256_maskz_srai_epi32 (__mmask8 __U, __m256i __A, const unsigned int __imm)
{
return (__m256i) __builtin_ia32_psradi256_mask ((__v8si) __A, __imm,
(__v8si)
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_srai_epi32 (__m128i __W, __mmask8 __U, __m128i __A,
- const int __imm)
+ const unsigned int __imm)
{
return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
(__v4si) __W,
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const int __imm)
+_mm_maskz_srai_epi32 (__mmask8 __U, __m128i __A, const unsigned int __imm)
{
return (__m128i) __builtin_ia32_psradi128_mask ((__v4si) __A, __imm,
(__v4si)
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_srai_epi64 (__m256i __A, const int __imm)
+_mm256_srai_epi64 (__m256i __A, const unsigned int __imm)
{
return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
(__v4di)
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_srai_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
- const int __imm)
+ const unsigned int __imm)
{
return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
(__v4di) __W,
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const int __imm)
+_mm256_maskz_srai_epi64 (__mmask8 __U, __m256i __A, const unsigned int __imm)
{
return (__m256i) __builtin_ia32_psraqi256_mask ((__v4di) __A, __imm,
(__v4di)
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_srai_epi64 (__m128i __A, const int __imm)
+_mm_srai_epi64 (__m128i __A, const unsigned int __imm)
{
return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
(__v2di)
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm_mask_srai_epi64 (__m128i __W, __mmask8 __U, __m128i __A,
- const int __imm)
+ const unsigned int __imm)
{
return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
(__v2di) __W,
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const int __imm)
+_mm_maskz_srai_epi64 (__mmask8 __U, __m128i __A, const unsigned int __imm)
{
return (__m128i) __builtin_ia32_psraqi128_mask ((__v2di) __A, __imm,
(__v2di)
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
+_mm_mask_slli_epi32 (__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
{
return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
(__v4si) __W,
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, int __B)
+_mm_maskz_slli_epi32 (__mmask8 __U, __m128i __A, unsigned int __B)
{
return (__m128i) __builtin_ia32_pslldi128_mask ((__v4si) __A, __B,
(__v4si)
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, int __B)
+_mm_mask_slli_epi64 (__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
{
return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
(__v2di) __W,
extern __inline __m128i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, int __B)
+_mm_maskz_slli_epi64 (__mmask8 __U, __m128i __A, unsigned int __B)
{
return (__m128i) __builtin_ia32_psllqi128_mask ((__v2di) __A, __B,
(__v2di)
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_slli_epi32 (__m256i __W, __mmask8 __U, __m256i __A,
- int __B)
+ unsigned int __B)
{
return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
(__v8si) __W,
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, int __B)
+_mm256_maskz_slli_epi32 (__mmask8 __U, __m256i __A, unsigned int __B)
{
return (__m256i) __builtin_ia32_pslldi256_mask ((__v8si) __A, __B,
(__v8si)
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm256_mask_slli_epi64 (__m256i __W, __mmask8 __U, __m256i __A,
- int __B)
+ unsigned int __B)
{
return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
(__v4di) __W,
extern __inline __m256i
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, int __B)
+_mm256_maskz_slli_epi64 (__mmask8 __U, __m256i __A, unsigned int __B)
{
return (__m256i) __builtin_ia32_psllqi256_mask ((__v4di) __A, __B,
(__v4di)
#define _mm256_mask_srli_epi32(W, U, A, B) \
((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
- (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
+ (unsigned int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
#define _mm256_maskz_srli_epi32(U, A, B) \
((__m256i) __builtin_ia32_psrldi256_mask ((__v8si)(__m256i)(A), \
- (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
+ (unsigned int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
#define _mm_mask_srli_epi32(W, U, A, B) \
((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
- (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
+ (unsigned int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
#define _mm_maskz_srli_epi32(U, A, B) \
((__m128i) __builtin_ia32_psrldi128_mask ((__v4si)(__m128i)(A), \
- (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
+ (unsigned int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
#define _mm256_mask_srli_epi64(W, U, A, B) \
((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
- (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
+ (unsigned int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
#define _mm256_maskz_srli_epi64(U, A, B) \
((__m256i) __builtin_ia32_psrlqi256_mask ((__v4di)(__m256i)(A), \
- (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
+ (unsigned int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
#define _mm_mask_srli_epi64(W, U, A, B) \
((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
- (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
+ (unsigned int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
#define _mm_maskz_srli_epi64(U, A, B) \
((__m128i) __builtin_ia32_psrlqi128_mask ((__v2di)(__m128i)(A), \
- (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
+ (unsigned int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
-#define _mm256_mask_slli_epi32(W, U, X, C) \
- ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
- (__v8si)(__m256i)(W), \
+#define _mm256_mask_slli_epi32(W, U, X, C) \
+ ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), \
+ (unsigned int)(C), \
+ (__v8si)(__m256i)(W), \
(__mmask8)(U)))
-#define _mm256_maskz_slli_epi32(U, X, C) \
- ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), (int)(C),\
- (__v8si)(__m256i)_mm256_setzero_si256 (), \
+#define _mm256_maskz_slli_epi32(U, X, C) \
+ ((__m256i)__builtin_ia32_pslldi256_mask ((__v8si)(__m256i)(X), \
+ (unsigned int)(C), \
+ (__v8si)(__m256i)_mm256_setzero_si256 (), \
(__mmask8)(U)))
-#define _mm256_mask_slli_epi64(W, U, X, C) \
- ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
- (__v4di)(__m256i)(W), \
+#define _mm256_mask_slli_epi64(W, U, X, C) \
+ ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), \
+ (unsigned int)(C), \
+ (__v4di)(__m256i)(W), \
(__mmask8)(U)))
-#define _mm256_maskz_slli_epi64(U, X, C) \
- ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), (int)(C),\
- (__v4di)(__m256i)_mm256_setzero_si256 (), \
+#define _mm256_maskz_slli_epi64(U, X, C) \
+ ((__m256i)__builtin_ia32_psllqi256_mask ((__v4di)(__m256i)(X), \
+ (unsigned int)(C), \
+ (__v4di)(__m256i)_mm256_setzero_si256 (), \
(__mmask8)(U)))
-#define _mm_mask_slli_epi32(W, U, X, C) \
- ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
- (__v4si)(__m128i)(W),\
+#define _mm_mask_slli_epi32(W, U, X, C) \
+ ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), \
+ (unsigned int)(C), \
+ (__v4si)(__m128i)(W), \
(__mmask8)(U)))
-#define _mm_maskz_slli_epi32(U, X, C) \
- ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), (int)(C),\
- (__v4si)(__m128i)_mm_setzero_si128 (),\
+#define _mm_maskz_slli_epi32(U, X, C) \
+ ((__m128i)__builtin_ia32_pslldi128_mask ((__v4si)(__m128i)(X), \
+ (unsigned int)(C), \
+ (__v4si)(__m128i)_mm_setzero_si128 (), \
(__mmask8)(U)))
-#define _mm_mask_slli_epi64(W, U, X, C) \
- ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
- (__v2di)(__m128i)(W),\
+#define _mm_mask_slli_epi64(W, U, X, C) \
+ ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), \
+ (unsigned int)(C), \
+ (__v2di)(__m128i)(W), \
(__mmask8)(U)))
-#define _mm_maskz_slli_epi64(U, X, C) \
- ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), (int)(C),\
- (__v2di)(__m128i)_mm_setzero_si128 (),\
+#define _mm_maskz_slli_epi64(U, X, C) \
+ ((__m128i)__builtin_ia32_psllqi128_mask ((__v2di)(__m128i)(X), \
+ (unsigned int)(C), \
+ (__v2di)(__m128i)_mm_setzero_si128 (), \
(__mmask8)(U)))
#define _mm256_ternarylogic_epi64(A, B, C, I) \
#define _mm256_mask_srai_epi32(W, U, A, B) \
((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
- (int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
+ (unsigned int)(B), (__v8si)(__m256i)(W), (__mmask8)(U)))
#define _mm256_maskz_srai_epi32(U, A, B) \
((__m256i) __builtin_ia32_psradi256_mask ((__v8si)(__m256i)(A), \
- (int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
+ (unsigned int)(B), (__v8si)_mm256_setzero_si256 (), (__mmask8)(U)))
#define _mm_mask_srai_epi32(W, U, A, B) \
((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
- (int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
+ (unsigned int)(B), (__v4si)(__m128i)(W), (__mmask8)(U)))
#define _mm_maskz_srai_epi32(U, A, B) \
((__m128i) __builtin_ia32_psradi128_mask ((__v4si)(__m128i)(A), \
- (int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
+ (unsigned int)(B), (__v4si)_mm_setzero_si128 (), (__mmask8)(U)))
#define _mm256_srai_epi64(A, B) \
((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
- (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
+ (unsigned int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)-1))
#define _mm256_mask_srai_epi64(W, U, A, B) \
((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
- (int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
+ (unsigned int)(B), (__v4di)(__m256i)(W), (__mmask8)(U)))
#define _mm256_maskz_srai_epi64(U, A, B) \
((__m256i) __builtin_ia32_psraqi256_mask ((__v4di)(__m256i)(A), \
- (int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
+ (unsigned int)(B), (__v4di)_mm256_setzero_si256 (), (__mmask8)(U)))
#define _mm_srai_epi64(A, B) \
((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
- (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)-1))
+ (unsigned int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)-1))
#define _mm_mask_srai_epi64(W, U, A, B) \
((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
- (int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
+ (unsigned int)(B), (__v2di)(__m128i)(W), (__mmask8)(U)))
#define _mm_maskz_srai_epi64(U, A, B) \
((__m128i) __builtin_ia32_psraqi128_mask ((__v2di)(__m128i)(A), \
- (int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
+ (unsigned int)(B), (__v2di)_mm_setzero_si128 (), (__mmask8)(U)))
#define _mm256_mask_permutex_pd(W, U, A, B) \
((__m256d) __builtin_ia32_permdf256_mask ((__v4df)(__m256d)(A), \
--- /dev/null
+/* PR target/109173 */
+/* { dg-do compile } */
+/* { dg-options "-c -Wsign-conversion -Werror -mavx512bw -mavx512vl -O2" } */
+
+#include <immintrin.h>
+
+extern unsigned int bar();
+
+void foo()
+{
+ __m128i a1, w1;
+ __m256i a2, w2;
+ __mmask8 u;
+
+ _mm256_mask_srli_epi32(w2, u, a2, bar());
+ _mm256_maskz_srli_epi32(u, a2, bar());
+ _mm_mask_srli_epi32(w1, u, a1, bar());
+ _mm_maskz_srli_epi32(u, a1, bar());
+
+ _mm256_mask_srli_epi64(w2, u, a2, bar());
+ _mm256_maskz_srli_epi64(u, a2, bar());
+ _mm_mask_srli_epi64(w1, u, a1, bar());
+ _mm_maskz_srli_epi64(u, a1, bar());
+
+ _mm256_mask_srai_epi32(w2, u, a2, bar());
+ _mm256_maskz_srai_epi32(u, a2, bar());
+ _mm_mask_srai_epi32(w1, u, a1, bar());
+ _mm_maskz_srai_epi32(u, a1, bar());
+
+ _mm256_srai_epi64(a2, bar());
+ _mm256_mask_srai_epi64(w2, u, a2, bar());
+ _mm256_maskz_srai_epi64(u, a2, bar());
+ _mm_srai_epi64(a1, bar());
+ _mm_mask_srai_epi64(w1, u, a1, bar());
+ _mm_maskz_srai_epi64(u, a1, bar());
+
+ _mm256_mask_slli_epi32(w2, u, a2, bar());
+ _mm256_maskz_slli_epi32(u, a2, bar());
+ _mm_mask_slli_epi32(w1, u, a1, bar());
+ _mm_maskz_slli_epi32(u, a1, bar());
+
+ _mm256_mask_slli_epi64(w2, u, a2, bar());
+ _mm256_maskz_slli_epi64(u, a2, bar());
+ _mm_mask_slli_epi64(w1, u, a1, bar());
+ _mm_maskz_slli_epi64(u, a1, bar());
+
+ _mm256_mask_srai_epi16(w2, u, a2, bar());
+ _mm256_maskz_srai_epi16(u, a2, bar());
+ _mm_mask_srai_epi16(w1, u, a1, bar());
+ _mm_maskz_srai_epi16(u, a1, bar());
+
+ _mm256_mask_slli_epi16(w2, u, a2, bar());
+ _mm256_maskz_slli_epi16(u, a2, bar());
+ _mm_mask_slli_epi16(w1, u, a1, bar());
+ _mm_maskz_slli_epi16(u, a1, bar());
+}
+
--- /dev/null
+/* PR target/109174 */
+/* { dg-do compile } */
+/* { dg-options "-c -Wsign-conversion -Werror -mavx512bw -O2" } */
+
+#include <immintrin.h>
+
+extern unsigned int bar();
+
+void foo()
+{
+ __m512i a, w;
+ __mmask32 u1;
+ __mmask16 u2;
+ __mmask8 u3;
+
+ _mm512_slli_epi64(a, bar());
+ _mm512_mask_slli_epi64(w, u3, a, bar());
+ _mm512_maskz_slli_epi64(u3, a, bar());
+ _mm512_slli_epi32(a, bar());
+ _mm512_mask_slli_epi32(w, u2, a, bar());
+ _mm512_maskz_slli_epi32(u2, a, bar());
+ _mm512_slli_epi16(a, bar());
+ _mm512_mask_slli_epi16(w, u1, a, bar());
+ _mm512_maskz_slli_epi16(u1, a, bar());
+
+ _mm512_srai_epi64(a, bar());
+ _mm512_mask_srai_epi64(w, u3, a, bar());
+ _mm512_maskz_srai_epi64(u3, a, bar());
+ _mm512_srai_epi32(a, bar());
+ _mm512_mask_srai_epi32(w, u2, a, bar());
+ _mm512_maskz_srai_epi32(u2, a, bar());
+ _mm512_srai_epi16(a, bar());
+ _mm512_mask_srai_epi16(w, u1, a, bar());
+ _mm512_maskz_srai_epi16(u1, a, bar());
+
+ _mm512_srli_epi64(a, bar());
+ _mm512_mask_srli_epi64(w, u3, a, bar());
+ _mm512_maskz_srli_epi64(u3, a, bar());
+ _mm512_srli_epi32(a, bar());
+ _mm512_mask_srli_epi32(w, u2, a, bar());
+ _mm512_maskz_srli_epi32(u2, a, bar());
+ _mm512_srli_epi16(a, bar());
+ _mm512_mask_srli_epi16(w, u1, a, bar());
+}
+