]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
AVX512F: Add missing macro for mask(z?)_scalf_s[sd] [PR 105339]
authorHongyu Wang <hongyu.wang@intel.com>
Fri, 22 Apr 2022 06:42:30 +0000 (14:42 +0800)
committerHongyu Wang <hongyu.wang@intel.com>
Wed, 27 Apr 2022 05:12:24 +0000 (13:12 +0800)
Add missing macro under O0 and adjust macro format for scalf
intrinsics.

gcc/ChangeLog:

PR target/105339
* config/i386/avx512fintrin.h (_mm512_scalef_round_pd):
Add parentheses for parameters and djust format.
(_mm512_mask_scalef_round_pd): Ditto.
(_mm512_maskz_scalef_round_pd): Ditto.
(_mm512_scalef_round_ps): Ditto.
(_mm512_mask_scalef_round_ps): Ditto.
(_mm512_maskz_scalef_round_ps): Ditto.
(_mm_scalef_round_sd): Use _mm_undefined_pd.
(_mm_scalef_round_ss): Use _mm_undefined_ps.
(_mm_mask_scalef_round_sd): New macro.
(_mm_mask_scalef_round_ss): Ditto.
(_mm_maskz_scalef_round_sd): Ditto.
(_mm_maskz_scalef_round_ss): Ditto.

gcc/testsuite/ChangeLog:

PR target/105339
* gcc.target/i386/sse-14.c: Add tests for new macro.

(cherry picked from commit 3c940d42701707559fabe49be99296f60fbc43e7)

gcc/config/i386/avx512fintrin.h
gcc/testsuite/gcc.target/i386/sse-14.c

index 76a625f477e94808905eb1fbc807945b6bffbe93..741cbff92340bfafce0da4d6349b08d62ab0e5f7 100644 (file)
@@ -3237,31 +3237,67 @@ _mm_maskz_scalef_round_ss (__mmask8 __U, __m128 __A, __m128 __B, const int __R)
                                                      (__mmask8) __U, __R);
 }
 #else
-#define _mm512_scalef_round_pd(A, B, C)            \
-    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_undefined_pd(), -1, C)
-
-#define _mm512_mask_scalef_round_pd(W, U, A, B, C) \
-    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, W, U, C)
-
-#define _mm512_maskz_scalef_round_pd(U, A, B, C)   \
-    (__m512d)__builtin_ia32_scalefpd512_mask(A, B, (__v8df)_mm512_setzero_pd(), U, C)
+#define _mm512_scalef_round_pd(A, B, C)                                        \
+  ((__m512d)                                                           \
+   __builtin_ia32_scalefpd512_mask((A), (B),                           \
+                                  (__v8df) _mm512_undefined_pd(),      \
+                                  -1, (C)))
+
+#define _mm512_mask_scalef_round_pd(W, U, A, B, C)                     \
+  ((__m512d) __builtin_ia32_scalefpd512_mask((A), (B), (W), (U), (C)))
+
+#define _mm512_maskz_scalef_round_pd(U, A, B, C)                       \
+  ((__m512d)                                                           \
+   __builtin_ia32_scalefpd512_mask((A), (B),                           \
+                                  (__v8df) _mm512_setzero_pd(),        \
+                                  (U), (C)))
+
+#define _mm512_scalef_round_ps(A, B, C)                                        \
+  ((__m512)                                                            \
+   __builtin_ia32_scalefps512_mask((A), (B),                           \
+                                  (__v16sf) _mm512_undefined_ps(),     \
+                                  -1, (C)))
+
+#define _mm512_mask_scalef_round_ps(W, U, A, B, C)                     \
+  ((__m512) __builtin_ia32_scalefps512_mask((A), (B), (W), (U), (C)))
+
+#define _mm512_maskz_scalef_round_ps(U, A, B, C)                       \
+  ((__m512)                                                            \
+   __builtin_ia32_scalefps512_mask((A), (B),                           \
+                                  (__v16sf) _mm512_setzero_ps(),       \
+                                  (U), (C)))
+
+#define _mm_scalef_round_sd(A, B, C)                                   \
+  ((__m128d)                                                           \
+   __builtin_ia32_scalefsd_mask_round ((A), (B),                       \
+                                      (__v2df) _mm_undefined_pd (),    \
+                                      -1, (C)))
 
-#define _mm512_scalef_round_ps(A, B, C)            \
-    (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_undefined_ps(), -1, C)
+#define _mm_scalef_round_ss(A, B, C)                                   \
+  ((__m128)                                                            \
+   __builtin_ia32_scalefss_mask_round ((A), (B),                       \
+                                      (__v4sf) _mm_undefined_ps (),    \
+                                      -1, (C)))
 
-#define _mm512_mask_scalef_round_ps(W, U, A, B, C) \
-    (__m512)__builtin_ia32_scalefps512_mask(A, B, W, U, C)
+#define _mm_mask_scalef_round_sd(W, U, A, B, C)                                \
+  ((__m128d)                                                           \
+   __builtin_ia32_scalefsd_mask_round ((A), (B), (W), (U), (C)))
 
-#define _mm512_maskz_scalef_round_ps(U, A, B, C)   \
-    (__m512)__builtin_ia32_scalefps512_mask(A, B, (__v16sf)_mm512_setzero_ps(), U, C)
+#define _mm_mask_scalef_round_ss(W, U, A, B, C)                                \
+  ((__m128)                                                            \
+   __builtin_ia32_scalefss_mask_round ((A), (B), (W), (U), (C)))
 
-#define _mm_scalef_round_sd(A, B, C)            \
-    (__m128d)__builtin_ia32_scalefsd_mask_round (A, B, \
-       (__v2df)_mm_setzero_pd (), -1, C)
+#define _mm_maskz_scalef_round_sd(U, A, B, C)                          \
+  ((__m128d)                                                           \
+   __builtin_ia32_scalefsd_mask_round ((A), (B),                       \
+                                      (__v2df) _mm_setzero_pd (),      \
+                                      (U), (C)))
 
-#define _mm_scalef_round_ss(A, B, C)            \
-    (__m128)__builtin_ia32_scalefss_mask_round (A, B, \
-       (__v4sf)_mm_setzero_ps (), -1, C)
+#define _mm_maskz_scalef_round_ss(U, A, B, C)                          \
+  ((__m128)                                                            \
+   __builtin_ia32_scalefss_mask_round ((A), (B),                       \
+                                      (__v4sf) _mm_setzero_ps (),      \
+                                      (U), (C)))
 #endif
 
 #ifdef __OPTIMIZE__
index 0d2b8b3cba085515b06aac07c70073f91e1616b1..683895e599ecdae66d1b727fe4270464fcd59650 100644 (file)
@@ -429,7 +429,9 @@ test_3 (_mm_maskz_mul_round_sd, __m128d, __mmask8, __m128d, __m128d, 9)
 test_3 (_mm512_maskz_mul_round_ps, __m512, __mmask16, __m512, __m512, 9)
 test_3 (_mm_maskz_mul_round_ss, __m128, __mmask8, __m128, __m128, 9)
 test_3 (_mm512_maskz_scalef_round_pd, __m512d, __mmask8, __m512d, __m512d, 9)
+test_3 (_mm_maskz_scalef_round_sd, __m128d, __mmask8, __m128d, __m128d, 9)
 test_3 (_mm512_maskz_scalef_round_ps, __m512, __mmask16, __m512, __m512, 9)
+test_3 (_mm_maskz_scalef_round_ss, __m128, __mmask8, __m128, __m128, 9)
 test_3 (_mm512_maskz_shuffle_f32x4, __m512, __mmask16, __m512, __m512, 1)
 test_3 (_mm512_maskz_shuffle_f64x2, __m512d, __mmask8, __m512d, __m512d, 1)
 test_3 (_mm512_maskz_shuffle_i32x4, __m512i, __mmask16, __m512i, __m512i, 1)
@@ -543,7 +545,9 @@ test_4 (_mm_mask_mul_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9)
 test_4 (_mm512_mask_mul_round_ps, __m512, __m512, __mmask16, __m512, __m512, 9)
 test_4 (_mm_mask_mul_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9)
 test_4 (_mm512_mask_scalef_round_pd, __m512d, __m512d, __mmask8, __m512d, __m512d, 9)
+test_4 (_mm_mask_scalef_round_sd, __m128d, __m128d, __mmask8, __m128d, __m128d, 9)
 test_4 (_mm512_mask_scalef_round_ps, __m512, __m512, __mmask16, __m512, __m512, 9)
+test_4 (_mm_mask_scalef_round_ss, __m128, __m128, __mmask8, __m128, __m128, 9)
 test_4 (_mm512_mask_shuffle_f32x4, __m512, __m512, __mmask16, __m512, __m512, 1)
 test_4 (_mm512_mask_shuffle_f64x2, __m512d, __m512d, __mmask8, __m512d, __m512d, 1)
 test_4 (_mm512_mask_shuffle_i32x4, __m512i, __m512i, __mmask16, __m512i, __m512i, 1)