]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Correct mask operand for AVX mask load/store.
authorH.J. Lu <hongjiu.lu@intel.com>
Mon, 17 Jan 2011 13:54:43 +0000 (13:54 +0000)
committerH.J. Lu <hjl@gcc.gnu.org>
Mon, 17 Jan 2011 13:54:43 +0000 (05:54 -0800)
gcc/

2011-01-17  H.J. Lu  <hongjiu.lu@intel.com>

Backport from mainline
2011-01-17  H.J. Lu  <hongjiu.lu@intel.com>

PR target/47318
* config/i386/avxintrin.h (_mm_maskload_pd): Change mask to
__m128i.
(_mm_maskstore_pd): Likewise.
(_mm_maskload_ps): Likewise.
(_mm_maskstore_ps): Likewise.
(_mm256_maskload_pd): Change mask to __m256i.
(_mm256_maskstore_pd): Likewise.
(_mm256_maskload_ps): Likewise.
(_mm256_maskstore_ps): Likewise.

* config/i386/i386-builtin-types.def: Updated.
(ix86_expand_special_args_builtin): Likewise.

* config/i386/i386.c (ix86_special_builtin_type): Remove
V8SF_FTYPE_PCV8SF_V8SF, V4DF_FTYPE_PCV4DF_V4DF,
V4SF_FTYPE_PCV4SF_V4SF, V2DF_FTYPE_PCV2DF_V2DF,
VOID_FTYPE_PV8SF_V8SF_V8SF, VOID_FTYPE_PV4DF_V4DF_V4DF,
VOID_FTYPE_PV4SF_V4SF_V4SF and VOID_FTYPE_PV2DF_V2DF_V2DF.
Add V8SF_FTYPE_PCV8SF_V8SI, V4DF_FTYPE_PCV4DF_V4DI,
V4SF_FTYPE_PCV4SF_V4SI, V2DF_FTYPE_PCV2DF_V2DI,
VOID_FTYPE_PV8SF_V8SI_V8SF, VOID_FTYPE_PV4DF_V4DI_V4DF,
VOID_FTYPE_PV4SF_V4SI_V4SF and VOID_FTYPE_PV2DF_V2DI_V2DF.
(bdesc_special_args): Update
__builtin_ia32_maskloadpd, __builtin_ia32_maskloadps,
__builtin_ia32_maskloadpd256, __builtin_ia32_maskloadps256,
__builtin_ia32_maskstorepd, __builtin_ia32_maskstoreps,
__builtin_ia32_maskstorepd256 and __builtin_ia32_maskstoreps256.
(ix86_init_mmx_sse_builtins): Updated.

* config/i386/sse.md (avx_maskload<ssemodesuffix><avxmodesuffix>):
Use <avxpermvecmode> on mask register.
(avx_maskstore<ssemodesuffix><avxmodesuffix>): Likewise.

gcc/testsuite/

2011-01-17  H.J. Lu  <hongjiu.lu@intel.com>

Backport from mainline
2011-01-17  H.J. Lu  <hongjiu.lu@intel.com>

PR target/47318
* gcc.target/i386/avx-vmaskmovpd-1.c: New.
* gcc.target/i386/avx-vmaskmovpd-2.c: Likewise.
* gcc.target/i386/avx-vmaskmovps-1.c: Likewise.
* gcc.target/i386/avx-vmaskmovps-1.c: Likewise.

* gcc.target/i386/avx-vmaskmovpd-256-1.c (avx_test): Load mask
as __m256i.
* gcc.target/i386/avx-vmaskmovpd-256-2.c (avx_test): Likewise.
* gcc.target/i386/avx-vmaskmovps-256-1.c (avx_test): Likewise.
* gcc.target/i386/avx-vmaskmovps-256-2.c (avx_test): Likewise.

From-SVN: r168904

13 files changed:
gcc/ChangeLog
gcc/config/i386/avxintrin.h
gcc/config/i386/i386.c
gcc/config/i386/sse.md
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-256-1.c
gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-256-2.c
gcc/testsuite/gcc.target/i386/avx-vmaskmovps-1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx-vmaskmovps-2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/avx-vmaskmovps-256-1.c
gcc/testsuite/gcc.target/i386/avx-vmaskmovps-256-2.c

index c9358d4e9ff7ba96145e49ecd65fcec5edf03221..412ceccfbff4864aaf83bd58af8c923b23d5f765 100644 (file)
@@ -1,3 +1,42 @@
+2011-01-17  H.J. Lu  <hongjiu.lu@intel.com>
+
+       Backport from mainline
+       2011-01-17  H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR target/47318
+       * config/i386/avxintrin.h (_mm_maskload_pd): Change mask to
+       __m128i.
+       (_mm_maskstore_pd): Likewise.
+       (_mm_maskload_ps): Likewise.
+       (_mm_maskstore_ps): Likewise.
+       (_mm256_maskload_pd): Change mask to __m256i.
+       (_mm256_maskstore_pd): Likewise.
+       (_mm256_maskload_ps): Likewise.
+       (_mm256_maskstore_ps): Likewise.
+
+       * config/i386/i386-builtin-types.def: Updated.
+       (ix86_expand_special_args_builtin): Likewise.
+
+       * config/i386/i386.c (ix86_special_builtin_type): Remove
+       V8SF_FTYPE_PCV8SF_V8SF, V4DF_FTYPE_PCV4DF_V4DF,
+       V4SF_FTYPE_PCV4SF_V4SF, V2DF_FTYPE_PCV2DF_V2DF,
+       VOID_FTYPE_PV8SF_V8SF_V8SF, VOID_FTYPE_PV4DF_V4DF_V4DF,
+       VOID_FTYPE_PV4SF_V4SF_V4SF and VOID_FTYPE_PV2DF_V2DF_V2DF.
+       Add V8SF_FTYPE_PCV8SF_V8SI, V4DF_FTYPE_PCV4DF_V4DI,
+       V4SF_FTYPE_PCV4SF_V4SI, V2DF_FTYPE_PCV2DF_V2DI,
+       VOID_FTYPE_PV8SF_V8SI_V8SF, VOID_FTYPE_PV4DF_V4DI_V4DF,
+       VOID_FTYPE_PV4SF_V4SI_V4SF and VOID_FTYPE_PV2DF_V2DI_V2DF.
+       (bdesc_special_args): Update
+       __builtin_ia32_maskloadpd, __builtin_ia32_maskloadps,
+       __builtin_ia32_maskloadpd256, __builtin_ia32_maskloadps256,
+       __builtin_ia32_maskstorepd, __builtin_ia32_maskstoreps,
+       __builtin_ia32_maskstorepd256 and __builtin_ia32_maskstoreps256.
+       (ix86_init_mmx_sse_builtins): Updated.
+
+       * config/i386/sse.md (avx_maskload<ssemodesuffix><avxmodesuffix>):
+       Use <avxpermvecmode> on mask register.
+       (avx_maskstore<ssemodesuffix><avxmodesuffix>): Likewise.
+
 2011-01-16  Jakub Jelinek  <jakub@redhat.com>
 
        Backport from mainline
index 26925fd7fbbba6f6956c4260059103b855651dc7..70bfce1a27462287ebddb038e9117050c1522da1 100644 (file)
@@ -890,55 +890,55 @@ _mm256_storeu_si256 (__m256i *__P, __m256i __A)
 }
 
 extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskload_pd (double const *__P, __m128d __M)
+_mm_maskload_pd (double const *__P, __m128i __M)
 {
   return (__m128d) __builtin_ia32_maskloadpd ((const __v2df *)__P,
-                                             (__v2df)__M);
+                                             (__v2di)__M);
 }
 
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskstore_pd (double *__P, __m128d __M, __m128d __A)
+_mm_maskstore_pd (double *__P, __m128i __M, __m128d __A)
 {
-  __builtin_ia32_maskstorepd ((__v2df *)__P, (__v2df)__M, (__v2df)__A);
+  __builtin_ia32_maskstorepd ((__v2df *)__P, (__v2di)__M, (__v2df)__A);
 }
 
 extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskload_pd (double const *__P, __m256d __M)
+_mm256_maskload_pd (double const *__P, __m256i __M)
 {
   return (__m256d) __builtin_ia32_maskloadpd256 ((const __v4df *)__P,
-                                                (__v4df)__M);
+                                                (__v4di)__M);
 }
 
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskstore_pd (double *__P, __m256d __M, __m256d __A)
+_mm256_maskstore_pd (double *__P, __m256i __M, __m256d __A)
 {
-  __builtin_ia32_maskstorepd256 ((__v4df *)__P, (__v4df)__M, (__v4df)__A);
+  __builtin_ia32_maskstorepd256 ((__v4df *)__P, (__v4di)__M, (__v4df)__A);
 }
 
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskload_ps (float const *__P, __m128 __M)
+_mm_maskload_ps (float const *__P, __m128i __M)
 {
   return (__m128) __builtin_ia32_maskloadps ((const __v4sf *)__P,
-                                            (__v4sf)__M);
+                                            (__v4si)__M);
 }
 
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskstore_ps (float *__P, __m128 __M, __m128 __A)
+_mm_maskstore_ps (float *__P, __m128i __M, __m128 __A)
 {
-  __builtin_ia32_maskstoreps ((__v4sf *)__P, (__v4sf)__M, (__v4sf)__A);
+  __builtin_ia32_maskstoreps ((__v4sf *)__P, (__v4si)__M, (__v4sf)__A);
 }
 
 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskload_ps (float const *__P, __m256 __M)
+_mm256_maskload_ps (float const *__P, __m256i __M)
 {
   return (__m256) __builtin_ia32_maskloadps256 ((const __v8sf *)__P,
-                                               (__v8sf)__M);
+                                               (__v8si)__M);
 }
 
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskstore_ps (float *__P, __m256 __M, __m256 __A)
+_mm256_maskstore_ps (float *__P, __m256i __M, __m256 __A)
 {
-  __builtin_ia32_maskstoreps256 ((__v8sf *)__P, (__v8sf)__M, (__v8sf)__A);
+  __builtin_ia32_maskstoreps256 ((__v8sf *)__P, (__v8si)__M, (__v8sf)__A);
 }
 
 extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
index 463f169a73cd2ff65cd65c34ede73ec21c1c5cd8..4e6f6895dcc4fb9d798bcbf0dcc715a34c39319f 100644 (file)
@@ -20806,12 +20806,12 @@ enum ix86_special_builtin_type
   V4DF_FTYPE_PCDOUBLE,
   V4SF_FTYPE_PCFLOAT,
   V2DF_FTYPE_PCDOUBLE,
-  V8SF_FTYPE_PCV8SF_V8SF,
-  V4DF_FTYPE_PCV4DF_V4DF,
+  V8SF_FTYPE_PCV8SF_V8SI,
+  V4DF_FTYPE_PCV4DF_V4DI,
   V4SF_FTYPE_V4SF_PCV2SF,
-  V4SF_FTYPE_PCV4SF_V4SF,
+  V4SF_FTYPE_PCV4SF_V4SI,
   V2DF_FTYPE_V2DF_PCDOUBLE,
-  V2DF_FTYPE_PCV2DF_V2DF,
+  V2DF_FTYPE_PCV2DF_V2DI,
   V2DI_FTYPE_PV2DI,
   VOID_FTYPE_PV2SF_V4SF,
   VOID_FTYPE_PV4DI_V4DI,
@@ -20824,10 +20824,10 @@ enum ix86_special_builtin_type
   VOID_FTYPE_PDOUBLE_V2DF,
   VOID_FTYPE_PDI_DI,
   VOID_FTYPE_PINT_INT,
-  VOID_FTYPE_PV8SF_V8SF_V8SF,
-  VOID_FTYPE_PV4DF_V4DF_V4DF,
-  VOID_FTYPE_PV4SF_V4SF_V4SF,
-  VOID_FTYPE_PV2DF_V2DF_V2DF
+  VOID_FTYPE_PV8SF_V8SI_V8SF,
+  VOID_FTYPE_PV4DF_V4DI_V4DF,
+  VOID_FTYPE_PV4SF_V4SI_V4SF,
+  VOID_FTYPE_PV2DF_V2DI_V2DF
 };
 
 /* Builtin types */
@@ -21058,14 +21058,14 @@ static const struct builtin_description bdesc_special_args[] =
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
   { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
 
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
-  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF },
+  { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF },
 };
 
 /* Builtins with variable number of arguments.  */
@@ -22620,40 +22620,40 @@ ix86_init_mmx_sse_builtins (void)
     = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
   tree pcv4df_type_node
     = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
-  tree v8sf_ftype_pcv8sf_v8sf
+  tree v8sf_ftype_pcv8sf_v8si
     = build_function_type_list (V8SF_type_node,
-                               pcv8sf_type_node, V8SF_type_node,
+                               pcv8sf_type_node, V8SI_type_node,
                                NULL_TREE);
-  tree v4df_ftype_pcv4df_v4df
+  tree v4df_ftype_pcv4df_v4di
     = build_function_type_list (V4DF_type_node,
-                               pcv4df_type_node, V4DF_type_node,
+                               pcv4df_type_node, V4DI_type_node,
                                NULL_TREE);
-  tree v4sf_ftype_pcv4sf_v4sf
+  tree v4sf_ftype_pcv4sf_v4si
     = build_function_type_list (V4SF_type_node,
-                               pcv4sf_type_node, V4SF_type_node,
+                               pcv4sf_type_node, V4SI_type_node,
                                NULL_TREE);
-  tree v2df_ftype_pcv2df_v2df
+  tree v2df_ftype_pcv2df_v2di
     = build_function_type_list (V2DF_type_node,
-                               pcv2df_type_node, V2DF_type_node,
+                               pcv2df_type_node, V2DI_type_node,
                                NULL_TREE);
-  tree void_ftype_pv8sf_v8sf_v8sf
+  tree void_ftype_pv8sf_v8si_v8sf
     = build_function_type_list (void_type_node,
-                               pv8sf_type_node, V8SF_type_node,
+                               pv8sf_type_node, V8SI_type_node,
                                V8SF_type_node,
                                NULL_TREE);
-  tree void_ftype_pv4df_v4df_v4df
+  tree void_ftype_pv4df_v4di_v4df
     = build_function_type_list (void_type_node,
-                               pv4df_type_node, V4DF_type_node,
+                               pv4df_type_node, V4DI_type_node,
                                V4DF_type_node,
                                NULL_TREE);
-  tree void_ftype_pv4sf_v4sf_v4sf
+  tree void_ftype_pv4sf_v4si_v4sf
     = build_function_type_list (void_type_node,
-                               pv4sf_type_node, V4SF_type_node,
+                               pv4sf_type_node, V4SI_type_node,
                                V4SF_type_node,
                                NULL_TREE);
-  tree void_ftype_pv2df_v2df_v2df
+  tree void_ftype_pv2df_v2di_v2df
     = build_function_type_list (void_type_node,
-                               pv2df_type_node, V2DF_type_node,
+                               pv2df_type_node, V2DI_type_node,
                                V2DF_type_node,
                                NULL_TREE);
   tree v4df_ftype_v2df
@@ -22759,23 +22759,23 @@ ix86_init_mmx_sse_builtins (void)
        case V2DF_FTYPE_PCDOUBLE:
          type = v2df_ftype_pcdouble;
          break;
-       case V8SF_FTYPE_PCV8SF_V8SF:
-         type = v8sf_ftype_pcv8sf_v8sf;
+       case V8SF_FTYPE_PCV8SF_V8SI:
+         type = v8sf_ftype_pcv8sf_v8si;
          break;
-       case V4DF_FTYPE_PCV4DF_V4DF:
-         type = v4df_ftype_pcv4df_v4df;
+       case V4DF_FTYPE_PCV4DF_V4DI:
+         type = v4df_ftype_pcv4df_v4di;
          break;
        case V4SF_FTYPE_V4SF_PCV2SF:
          type = v4sf_ftype_v4sf_pcv2sf;
          break;
-       case V4SF_FTYPE_PCV4SF_V4SF:
-         type = v4sf_ftype_pcv4sf_v4sf;
+       case V4SF_FTYPE_PCV4SF_V4SI:
+         type = v4sf_ftype_pcv4sf_v4si;
          break;
        case V2DF_FTYPE_V2DF_PCDOUBLE:
          type = v2df_ftype_v2df_pcdouble;
          break;
-       case V2DF_FTYPE_PCV2DF_V2DF:
-         type = v2df_ftype_pcv2df_v2df;
+       case V2DF_FTYPE_PCV2DF_V2DI:
+         type = v2df_ftype_pcv2df_v2di;
          break;
        case VOID_FTYPE_PV2SF_V4SF:
          type = void_ftype_pv2sf_v4sf;
@@ -22810,17 +22810,17 @@ ix86_init_mmx_sse_builtins (void)
        case VOID_FTYPE_PINT_INT:
          type = void_ftype_pint_int;
          break;
-       case VOID_FTYPE_PV8SF_V8SF_V8SF:
-         type = void_ftype_pv8sf_v8sf_v8sf;
+       case VOID_FTYPE_PV8SF_V8SI_V8SF:
+         type = void_ftype_pv8sf_v8si_v8sf;
          break;
-       case VOID_FTYPE_PV4DF_V4DF_V4DF:
-         type = void_ftype_pv4df_v4df_v4df;
+       case VOID_FTYPE_PV4DF_V4DI_V4DF:
+         type = void_ftype_pv4df_v4di_v4df;
          break;
-       case VOID_FTYPE_PV4SF_V4SF_V4SF:
-         type = void_ftype_pv4sf_v4sf_v4sf;
+       case VOID_FTYPE_PV4SF_V4SI_V4SF:
+         type = void_ftype_pv4sf_v4si_v4sf;
          break;
-       case VOID_FTYPE_PV2DF_V2DF_V2DF:
-         type = void_ftype_pv2df_v2df_v2df;
+       case VOID_FTYPE_PV2DF_V2DI_V2DF:
+         type = void_ftype_pv2df_v2di_v2df;
          break;
        default:
          gcc_unreachable ();
@@ -24650,18 +24650,18 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
       klass = load;
       memory = 1;
       break;
-    case V8SF_FTYPE_PCV8SF_V8SF:
-    case V4DF_FTYPE_PCV4DF_V4DF:
-    case V4SF_FTYPE_PCV4SF_V4SF:
-    case V2DF_FTYPE_PCV2DF_V2DF:
+    case V8SF_FTYPE_PCV8SF_V8SI:
+    case V4DF_FTYPE_PCV4DF_V4DI:
+    case V4SF_FTYPE_PCV4SF_V4SI:
+    case V2DF_FTYPE_PCV2DF_V2DI:
       nargs = 2;
       klass = load;
       memory = 0;
       break;
-    case VOID_FTYPE_PV8SF_V8SF_V8SF:
-    case VOID_FTYPE_PV4DF_V4DF_V4DF:
-    case VOID_FTYPE_PV4SF_V4SF_V4SF:
-    case VOID_FTYPE_PV2DF_V2DF_V2DF:
+    case VOID_FTYPE_PV8SF_V8SI_V8SF:
+    case VOID_FTYPE_PV4DF_V4DI_V4DF:
+    case VOID_FTYPE_PV4SF_V4SI_V4SF:
+    case VOID_FTYPE_PV2DF_V2DI_V2DF:
       nargs = 2;
       klass = store;
       /* Reserve memory operand for target.  */
index e5eba2ad6ff1ed96823288f57aaf6f75b6a30c97..73199b15e331d02ee22a4878a9249336e116324a 100644 (file)
   [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
        (unspec:AVXMODEF2P
          [(match_operand:AVXMODEF2P 1 "memory_operand" "m")
-          (match_operand:AVXMODEF2P 2 "register_operand" "x")
+          (match_operand:<avxpermvecmode> 2 "register_operand" "x")
           (match_dup 0)]
          UNSPEC_MASKLOAD))]
   "TARGET_AVX"
 (define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>"
   [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
        (unspec:AVXMODEF2P
-         [(match_operand:AVXMODEF2P 1 "register_operand" "x")
+         [(match_operand:<avxpermvecmode> 1 "register_operand" "x")
           (match_operand:AVXMODEF2P 2 "register_operand" "x")
           (match_dup 0)]
          UNSPEC_MASKSTORE))]
index 7923c4042d9ec671a81e23d4e0af36459960c4f6..c5c80e5af2e6b8db2b3dbf6b7d8810da21c83b8b 100644 (file)
@@ -1,3 +1,20 @@
+2011-01-17  H.J. Lu  <hongjiu.lu@intel.com>
+
+       Backport from mainline
+       2011-01-17  H.J. Lu  <hongjiu.lu@intel.com>
+
+       PR target/47318
+       * gcc.target/i386/avx-vmaskmovpd-1.c: New.
+       * gcc.target/i386/avx-vmaskmovpd-2.c: Likewise.
+       * gcc.target/i386/avx-vmaskmovps-1.c: Likewise.
+       * gcc.target/i386/avx-vmaskmovps-1.c: Likewise.
+
+       * gcc.target/i386/avx-vmaskmovpd-256-1.c (avx_test): Load mask
+       as __m256i.
+       * gcc.target/i386/avx-vmaskmovpd-256-2.c (avx_test): Likewise.
+       * gcc.target/i386/avx-vmaskmovps-256-1.c (avx_test): Likewise.
+       * gcc.target/i386/avx-vmaskmovps-256-2.c (avx_test): Likewise.
+
 2011-01-16  Jakub Jelinek  <jakub@redhat.com>
 
        Backport from mainline
diff --git a/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-1.c b/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-1.c
new file mode 100644 (file)
index 0000000..6204ebd
--- /dev/null
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O2 -mavx" } */
+
+#include "avx-check.h"
+
+#ifndef MASK
+#define MASK 7
+#endif
+
+#define mask_v(pos) (((MASK & (0x1ULL << (pos))) >> (pos)) << 63)
+
+void static
+avx_test (void)
+{
+  int i;
+  long long m[2] = {mask_v(0), mask_v(1)};
+  double s[2] = {1.1, 2.2};
+  union128d u;
+  union128i_q mask;
+  double e[2] = {0.0};
+
+  mask.x = _mm_loadu_si128 ((__m128i *)m);
+  u.x = _mm_maskload_pd (s, mask.x);
+
+  for (i = 0 ; i < 2; i++) 
+    e[i] = m[i] ? s[i] : 0;
+   
+  if (check_union128d (u, e))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-2.c b/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-2.c
new file mode 100644 (file)
index 0000000..6bc6207
--- /dev/null
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O2 -mavx" } */
+
+#include "avx-check.h"
+
+#ifndef MASK
+#define MASK 6
+#endif
+
+#define mask_v(pos) (((MASK & (0x1ULL << (pos))) >> (pos)) << 63)
+
+void static
+avx_test (void)
+{
+  int i;
+  long long m[2] = {mask_v(0), mask_v(1)};
+  double s[2] = {1.1, 2.2};
+  double e[2] = {0.0};
+  double d[2] = {0.0};
+  union128d src;
+  union128i_q mask;
+  
+  src.x = _mm_loadu_pd (s);
+  mask.x = _mm_loadu_si128 ((__m128i *)m);
+  _mm_maskstore_pd (d, mask.x, src.x);
+
+  for (i = 0 ; i < 2; i++) 
+    e[i] = m[i] ? s[i] : 0;
+   
+  if (checkVd (d, e, 2))
+    abort ();
+}
index f29826bbbd4c7221b2b0bef7d499be33094ae7b9..e591c05c8e87b2949126b6ac9fb8ce0881f2be82 100644 (file)
@@ -14,12 +14,13 @@ void static
 avx_test (void)
 {
   int i;
-  long long m[8] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3)};
+  long long m[4] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3)};
   double s[4] = {1.1, 2.2, 3.3, 4.4};
-  union256d u, mask;
+  union256d u;
+  union256i_q mask;
   double e [4] = {0.0};
 
-  mask.x = _mm256_loadu_pd ((double*)m);
+  mask.x = _mm256_loadu_si256 ((__m256i *)m);
   u.x = _mm256_maskload_pd (s, mask.x);
 
   for (i = 0 ; i < 4; i++) 
index 1e574b6993bae15fd446de3413bc44531af9177c..5df2f94ee5f9d67b13c1f50996c96af1eef56062 100644 (file)
@@ -18,10 +18,11 @@ avx_test (void)
   double s[4] = {1.1, 2.2, 3.3, 4.4};
   double e [4] = {0.0};
   double d [4] = {0.0};
-  union256d src, mask;
+  union256d src;
+  union256i_q mask;
   
   src.x = _mm256_loadu_pd (s);
-  mask.x = _mm256_loadu_pd ((double*)m);
+  mask.x = _mm256_loadu_si256 ((__m256i *)m);
   _mm256_maskstore_pd (d, mask.x, src.x);
 
   for (i = 0 ; i < 4; i++) 
diff --git a/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-1.c b/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-1.c
new file mode 100644 (file)
index 0000000..360a04d
--- /dev/null
@@ -0,0 +1,31 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O2 -mavx" } */
+
+#include "avx-check.h"
+
+#ifndef MASK
+#define MASK 134
+#endif
+
+#define mask_v(pos) (((MASK & (0x1 << (pos))) >> (pos)) << 31)
+
+void static
+avx_test (void)
+{
+  int i;
+  int m[4] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3)};
+  float s[4] = {1,2,3,4};
+  union128 u;
+  union128i_d mask;
+  float e[4] = {0.0};
+
+  mask.x = _mm_loadu_si128 ((__m128i *)m);
+  u.x = _mm_maskload_ps (s, mask.x);
+
+  for (i = 0 ; i < 4; i++) 
+    e[i] = m[i] ? s[i] : 0;
+   
+  if (check_union128 (u, e))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-2.c b/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-2.c
new file mode 100644 (file)
index 0000000..3dde965
--- /dev/null
@@ -0,0 +1,33 @@
+/* { dg-do run } */
+/* { dg-require-effective-target avx } */
+/* { dg-options "-O2 -mavx" } */
+
+#include "avx-check.h"
+
+#ifndef MASK
+#define MASK 214
+#endif
+
+#define mask_v(pos) (((MASK & (0x1 << (pos))) >> (pos)) << 31)
+
+void static
+avx_test (void)
+{
+  int i;
+  int m[4] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3)};
+  float s[4] = {1,2,3,4};
+  union128 src;
+  union128i_d mask;
+  float e[4] = {0.0};
+  float d[4] = {0.0};
+
+  src.x = _mm_loadu_ps (s);
+  mask.x = _mm_loadu_si128 ((__m128i *)m);
+  _mm_maskstore_ps (d, mask.x, src.x);
+
+  for (i = 0 ; i < 4; i++) 
+    e[i] = m[i] ? s[i] : 0;
+   
+  if (checkVf (d, e, 4))
+    abort ();
+}
index 9e6c7f91d914d90c86cee2abba484c6f1a5edf22..647ce3f6e61b42a04cfc007f8a03005f65bac209 100644 (file)
@@ -16,10 +16,11 @@ avx_test (void)
   int i;
   int m[8] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3), mask_v(4), mask_v(5), mask_v(6), mask_v(7)};
   float s[8] = {1,2,3,4,5,6,7,8};
-  union256 u, mask;
+  union256 u;
+  union256i_d mask;
   float e [8] = {0.0};
 
-  mask.x = _mm256_loadu_ps ((float*)m);
+  mask.x = _mm256_loadu_si256 ((__m256i *)m);
   u.x = _mm256_maskload_ps (s, mask.x);
 
   for (i = 0 ; i < 8; i++) 
index 90d91a06a6d02834cb97aeb2d9c06b88fbc01960..016904d4638892e2bff605a564a1ac28b5c33687 100644 (file)
@@ -16,12 +16,13 @@ avx_test (void)
   int i;
   int m[8] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3), mask_v(4), mask_v(5), mask_v(6), mask_v(7)};
   float s[8] = {1,2,3,4,5,6,7,8};
-  union256 src, mask;
+  union256 src;
+  union256i_d mask;
   float e [8] = {0.0};
   float d [8] = {0.0};
 
   src.x = _mm256_loadu_ps (s);
-  mask.x = _mm256_loadu_ps ((float *)m);
+  mask.x = _mm256_loadu_si256 ((__m256i *)m);
   _mm256_maskstore_ps (d, mask.x, src.x);
 
   for (i = 0 ; i < 8; i++)