]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
avx512erintrin.h (_mm_rcp28_round_sd): Swap operands.
authorIlya Tocar <ilya.tocar@intel.com>
Thu, 20 Feb 2014 06:26:38 +0000 (06:26 +0000)
committerKirill Yukhin <kyukhin@gcc.gnu.org>
Thu, 20 Feb 2014 06:26:38 +0000 (06:26 +0000)
gcc/
* config/i386/avx512erintrin.h (_mm_rcp28_round_sd): Swap operands.
(_mm_rcp28_round_ss): Ditto.
(_mm_rsqrt28_round_sd): Ditto.
(_mm_rsqrt28_round_ss): Ditto.
* config/i386/avx512erintrin.h (_mm_rcp14_round_sd): Ditto.
(_mm_rcp14_round_ss): Ditto.
(_mm_rsqrt14_round_sd): Ditto.
(_mm_rsqrt14_round_ss): Ditto.
* config/i386/sse.md (rsqrt14<mode>): Put nonimmediate operand as
the first input operand, get rid of match_dup.
(avx512er_exp2<mode><mask_name><round_saeonly_name>): Set type
attribute to sse.
(<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>):
Ditto.
(avx512er_vmrcp28<mode><round_saeonly_name>): Put nonimmediate
operand as the first input operand, set type attribute.
(<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>):
Set type attribute.
(avx512er_vmrsqrt28<mode><round_saeonly_name>): Put nonimmediate
operand as the first input operand, set type attribute.

testsuite/gcc/
* gcc.target/i386/avx512er-vrcp28sd-2.c: Distinguish src1 and src2.
* gcc.target/i386/avx512er-vrcp28ss-2.c: Call correct intrinsic.
* gcc.target/i386/avx512er-vrsqrt28sd-2.c: Distinguish src1 and src2.
* gcc.target/i386/avx512er-vrsqrt28ss-2.c: Ditto.
* gcc.target/i386/avx512f-vrcp14sd-2.c: Fix reference calculation.
* gcc.target/i386/avx512f-vrcp14ss-2.c: Fix reference calculation.

Co-Authored-By: Kirill Yukhin <kirill.yukhin@intel.com>
From-SVN: r207932

gcc/ChangeLog
gcc/config/i386/avx512erintrin.h
gcc/config/i386/avx512fintrin.h
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx512er-vrcp28sd-2.c
gcc/testsuite/gcc.target/i386/avx512er-vrcp28ss-2.c
gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28sd-2.c
gcc/testsuite/gcc.target/i386/avx512er-vrsqrt28ss-2.c
gcc/testsuite/gcc.target/i386/avx512f-vrcp14sd-2.c
gcc/testsuite/gcc.target/i386/avx512f-vrcp14ss-2.c

index 8f84dbdd34a53cb0ae3b2acbe0e4f0484f2b2172..6748c0fc36a30b87995f83480352747060636bf3 100644 (file)
@@ -1,3 +1,27 @@
+2014-02-20  Ilya Tocar  <ilya.tocar@intel.com>
+            Kirill Yukhin  <kirill.yukhin@intel.com>
+
+       * config/i386/avx512erintrin.h (_mm_rcp28_round_sd): Swap operands.
+       (_mm_rcp28_round_ss): Ditto.
+       (_mm_rsqrt28_round_sd): Ditto.
+       (_mm_rsqrt28_round_ss): Ditto.
+       * config/i386/avx512erintrin.h (_mm_rcp14_round_sd): Ditto.
+       (_mm_rcp14_round_ss): Ditto.
+       (_mm_rsqrt14_round_sd): Ditto.
+       (_mm_rsqrt14_round_ss): Ditto.
+       * config/i386/sse.md (rsqrt14<mode>): Put nonimmediate operand as
+       the first input operand, get rid of match_dup.
+       (avx512er_exp2<mode><mask_name><round_saeonly_name>): Set type
+       attribute to sse.
+       (<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>):
+       Ditto.
+       (avx512er_vmrcp28<mode><round_saeonly_name>): Put nonimmediate
+       operand as the first input operand, set type attribute.
+       (<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>):
+       Set type attribute.
+       (avx512er_vmrsqrt28<mode><round_saeonly_name>): Put nonimmediate
+       operand as the first input operand, set type attribute.
+
 2014-02-19  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
 
        * config/rs6000/rs6000.c (vspltis_constant): Fix most significant
index 6fe05bc6608c818e7a42b3f08fd49c7bad634dab..f6870a5f7233998abd9a8562a217d7211093e061 100644 (file)
@@ -163,8 +163,8 @@ extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rcp28_round_sd (__m128d __A, __m128d __B, int __R)
 {
-  return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __A,
-                                                (__v2df) __B,
+  return (__m128d) __builtin_ia32_rcp28sd_round ((__v2df) __B,
+                                                (__v2df) __A,
                                                 __R);
 }
 
@@ -172,8 +172,8 @@ extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rcp28_round_ss (__m128 __A, __m128 __B, int __R)
 {
-  return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __A,
-                                               (__v4sf) __B,
+  return (__m128) __builtin_ia32_rcp28ss_round ((__v4sf) __B,
+                                               (__v4sf) __A,
                                                __R);
 }
 
@@ -237,8 +237,8 @@ extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rsqrt28_round_sd (__m128d __A, __m128d __B, int __R)
 {
-  return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __A,
-                                                  (__v2df) __B,
+  return (__m128d) __builtin_ia32_rsqrt28sd_round ((__v2df) __B,
+                                                  (__v2df) __A,
                                                   __R);
 }
 
@@ -246,8 +246,8 @@ extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
 {
-  return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __A,
-                                                 (__v4sf) __B,
+  return (__m128) __builtin_ia32_rsqrt28ss_round ((__v4sf) __B,
+                                                 (__v4sf) __A,
                                                  __R);
 }
 
@@ -375,16 +375,16 @@ _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
     _mm512_maskz_rsqrt28_round_ps(U, A, _MM_FROUND_CUR_DIRECTION)
 
 #define _mm_rcp28_sd(A, B)     \
-    __builtin_ia32_rcp28sd_round(A, B, _MM_FROUND_CUR_DIRECTION)
+    __builtin_ia32_rcp28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
 
 #define _mm_rcp28_ss(A, B)     \
-    __builtin_ia32_rcp28ss_round(A, B, _MM_FROUND_CUR_DIRECTION)
+    __builtin_ia32_rcp28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
 
 #define _mm_rsqrt28_sd(A, B)   \
-    __builtin_ia32_rsqrt28sd_round(A, B, _MM_FROUND_CUR_DIRECTION)
+    __builtin_ia32_rsqrt28sd_round(B, A, _MM_FROUND_CUR_DIRECTION)
 
 #define _mm_rsqrt28_ss(A, B)   \
-    __builtin_ia32_rsqrt28ss_round(A, B, _MM_FROUND_CUR_DIRECTION)
+    __builtin_ia32_rsqrt28ss_round(B, A, _MM_FROUND_CUR_DIRECTION)
 
 #ifdef __DISABLE_AVX512ER__
 #undef __DISABLE_AVX512ER__
index b3a4f3a41ffc6549b9982cfd1b3caaa4fca6751a..65a49c5c1f379fdd33235f5d84d719b67ae6e85d 100644 (file)
@@ -1470,16 +1470,16 @@ extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rcp14_sd (__m128d __A, __m128d __B)
 {
-  return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __A,
-                                          (__v2df) __B);
+  return (__m128d) __builtin_ia32_rcp14sd ((__v2df) __B,
+                                          (__v2df) __A);
 }
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rcp14_ss (__m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __A,
-                                         (__v4sf) __B);
+  return (__m128) __builtin_ia32_rcp14ss ((__v4sf) __B,
+                                         (__v4sf) __A);
 }
 
 extern __inline __m512d
@@ -1544,16 +1544,16 @@ extern __inline __m128d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rsqrt14_sd (__m128d __A, __m128d __B)
 {
-  return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __A,
-                                            (__v2df) __B);
+  return (__m128d) __builtin_ia32_rsqrt14sd ((__v2df) __B,
+                                            (__v2df) __A);
 }
 
 extern __inline __m128
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rsqrt14_ss (__m128 __A, __m128 __B)
 {
-  return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __A,
-                                           (__v4sf) __B);
+  return (__m128) __builtin_ia32_rsqrt14ss ((__v4sf) __B,
+                                           (__v4sf) __A);
 }
 
 #ifdef __OPTIMIZE__
index 5595767bd98bdbecf308acf0f23d0a59eabec463..487c917471690def5d4616c435d10281c5af5e28 100644 (file)
   [(set (match_operand:VF_128 0 "register_operand" "=v")
        (vec_merge:VF_128
          (unspec:VF_128
-           [(match_operand:VF_128 1 "register_operand" "v")
-            (match_operand:VF_128 2 "nonimmediate_operand" "vm")]
+           [(match_operand:VF_128 1 "nonimmediate_operand" "vm")]
            UNSPEC_RSQRT14)
-         (match_dup 1)
+         (match_operand:VF_128 2 "register_operand" "v")
          (const_int 1)))]
   "TARGET_AVX512F"
-  "vrsqrt14<ssescalarmodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+  "vrsqrt14<ssescalarmodesuffix>\t{%1, %2, %0|%0, %2, %1}"
   [(set_attr "type" "sse")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
   "TARGET_AVX512ER"
   "vexp2<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "prefix" "evex")
+   (set_attr "type" "sse")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
   "TARGET_AVX512ER"
   "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "prefix" "evex")
+   (set_attr "type" "sse")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "avx512er_vmrcp28<mode><round_saeonly_name>"
          (match_operand:VF_128 2 "register_operand" "v")
          (const_int 1)))]
   "TARGET_AVX512ER"
-  "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}"
+  "vrcp28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
   [(set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
+   (set_attr "type" "sse")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
   "TARGET_AVX512ER"
   "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "prefix" "evex")
+   (set_attr "type" "sse")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
          (match_operand:VF_128 2 "register_operand" "v")
          (const_int 1)))]
   "TARGET_AVX512ER"
-  "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%2, %1, %0|%0, %1, %2<round_saeonly_op3>}"
+  "vrsqrt28<ssescalarmodesuffix>\t{<round_saeonly_op3>%1, %2, %0|%0, %2, %1<round_saeonly_op3>}"
   [(set_attr "length_immediate" "1")
+   (set_attr "type" "sse")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
 
index fd796d7179fd26d1e2436e308260c6ee38887860..5aad2d5649dfafdf33c2d4d3eddf1cd3d9a8e1df 100644 (file)
@@ -1,3 +1,13 @@
+2014-02-20  Ilya Tocar  <ilya.tocar@intel.com>
+           Kirill Yukhin  <kirill.yukhin@intel.com>
+
+       * gcc.target/i386/avx512er-vrcp28sd-2.c: Distinguish src1 and src2.
+       * gcc.target/i386/avx512er-vrcp28ss-2.c: Call correct intrinsic.
+       * gcc.target/i386/avx512er-vrsqrt28sd-2.c: Distinguish src1 and src2.
+       * gcc.target/i386/avx512er-vrsqrt28ss-2.c: Ditto.
+       * gcc.target/i386/avx512f-vrcp14sd-2.c: Fix reference calculation.
+       * gcc.target/i386/avx512f-vrcp14ss-2.c: Fix reference calculation.
+
 2014-02-19  Jakub Jelinek  <jakub@redhat.com>
 
        PR c/37743
index d30f08828b6414dffe6a9d6451168e01654b6a3d..889f990acfe712dffba444c42fd0b6da9888a429 100644 (file)
 void static
 avx512er_test (void)
 {
-  union128d src, res;
+  union128d src1, src2, res;
   double res_ref[2];
   int i;
   
   for (i = 0; i < 2; i++)
     {
-      src.a[i] = 179.345 - 6.5645 * i;
-      res_ref[i] = src.a[i];
+      src1.a[i] = 179.345 - 6.5645 * i;
+      src2.a[i] = 204179.345 + 6.5645 * i;
+      res_ref[i] = src1.a[i];
     }
 
-  res_ref[0] = 1.0 / src.a[0];
+  res_ref[0] = 1.0 / src2.a[0];
 
-  res.x = _mm_rcp28_round_sd (src.x, src.x, _MM_FROUND_NO_EXC);
+  res.x = _mm_rcp28_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC);
 
   if (checkVd (res.a, res_ref, 2))
     abort ();
index 499a9771e78443e05dc3d5c47c423ce3597103c5..3280879107e9e4a3c97af589b094cb23c323e0a0 100644 (file)
 void static
 avx512er_test (void)
 {
-  union128 src, res;
+  union128 src1, src2, res;
   float res_ref[4];
   int i;
   
   for (i = 0; i < 4; i++)
     {
-      src.a[i] = 179.345 - 6.5645 * i;
-      res_ref[i] = src.a[i];
+      src1.a[i] = 179.345 - 6.5645 * i;
+      src2.a[i] = 179345.006 + 6.5645 * i;
+      res_ref[i] = src1.a[i];
     }
 
-  res_ref[0] = 1.0 / src.a[0];
+  res_ref[0] = 1.0 / src2.a[0];
 
-  res.x = _mm_rsqrt28_round_ss (src.x, src.x, _MM_FROUND_NO_EXC);
+  res.x = _mm_rcp28_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC);
 
   if (checkVf (res.a, res_ref, 4))
     abort ();
index 1537a5932ca9516d219df2f61ce82e4e278e066b..bd217e8228f3a8f2bf6bbe72c6382f4ee4aea3a4 100644 (file)
 void static
 avx512er_test (void)
 {
-  union128d src, res;
+  union128d src1, src2, res;
   double res_ref[2];
   int i;
   
   for (i = 0; i < 2; i++)
     {
-      src.a[i] = 179.345 - 6.5645 * i;
-      res_ref[i] = src.a[i];
+      src1.a[i] = 179.345 - 6.5645 * i;
+      src2.a[i] = 45 - 6.5645 * i;
+      res_ref[i] = src1.a[i];
     }
 
-  res_ref[0] = 1.0 / sqrt (src.a[0]);
+  res_ref[0] = 1.0 / sqrt (src2.a[0]);
 
-  res.x = _mm_rsqrt28_round_sd (src.x, src.x, _MM_FROUND_NO_EXC);
+  res.x = _mm_rsqrt28_round_sd (src1.x, src2.x, _MM_FROUND_NO_EXC);
 
   if (checkVd (res.a, res_ref, 2))
     abort ();
index f88422ea5edd07d7b63e3fe8e2bf35c753fa536e..f7bfff5a50da25860195aaf8be60385774cda7de 100644 (file)
 void static
 avx512er_test (void)
 {
-  union128 src, res;
+  union128 src1, src2, res;
   float res_ref[4];
   int i;
   
   for (i = 0; i < 4; i++)
     {
-      src.a[i] = 179.345 - 6.5645 * i;
-      res_ref[i] = src.a[i];
+      src1.a[i] = 179.345 - 6.5645 * i;
+      src2.a[i] = 179221345 + 6.5645 * i;
+      res_ref[i] = src1.a[i];
     }
 
-  res_ref[0] = 1.0 / sqrt (src.a[0]);
+  res_ref[0] = 1.0 / sqrt (src2.a[0]);
 
-  res.x = _mm_rsqrt28_round_ss (src.x, src.x, _MM_FROUND_NO_EXC);
+  res.x = _mm_rsqrt28_round_ss (src1.x, src2.x, _MM_FROUND_NO_EXC);
 
   if (checkVf (res.a, res_ref, 4))
     abort ();
index 0c9211a9a632099dd167bf5925eea2c496559fa4..f94460036c04bae393b0a000ada5db66ee093aeb 100644 (file)
@@ -8,8 +8,8 @@
 static void
 compute_vrcp14sd (double *s1, double *s2, double *r)
 {
-  r[0] = 1.0 / s1[0];
-  r[1] = s2[1];
+  r[0] = 1.0 / s2[0];
+  r[1] = s1[1];
 }
 
 static void
index 3344dadb8ef05375fc488d1b5e550c0ff12c0684..7aca591bfcaf5faf430491d64931b31093c27d5d 100644 (file)
@@ -8,10 +8,10 @@
 static void
 compute_vrcp14ss (float *s1, float *s2, float *r)
 {
-  r[0] = 1.0 / s1[0];
-  r[1] = s2[1];
-  r[2] = s2[2];
-  r[3] = s2[3];
+  r[0] = 1.0 / s2[0];
+  r[1] = s1[1];
+  r[2] = s1[2];
+  r[3] = s1[3];
 }
 
 static void