From: H.J. Lu Date: Mon, 17 Jan 2011 13:54:43 +0000 (+0000) Subject: Correct mask operand for AVX mask load/store. X-Git-Tag: releases/gcc-4.4.6~166 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=3ef292260394a36b99ffe88daf82303b9af9c553;p=thirdparty%2Fgcc.git Correct mask operand for AVX mask load/store. gcc/ 2011-01-17 H.J. Lu Backport from mainline 2011-01-17 H.J. Lu PR target/47318 * config/i386/avxintrin.h (_mm_maskload_pd): Change mask to __m128i. (_mm_maskstore_pd): Likewise. (_mm_maskload_ps): Likewise. (_mm_maskstore_ps): Likewise. (_mm256_maskload_pd): Change mask to __m256i. (_mm256_maskstore_pd): Likewise. (_mm256_maskload_ps): Likewise. (_mm256_maskstore_ps): Likewise. * config/i386/i386-builtin-types.def: Updated. (ix86_expand_special_args_builtin): Likewise. * config/i386/i386.c (ix86_special_builtin_type): Remove V8SF_FTYPE_PCV8SF_V8SF, V4DF_FTYPE_PCV4DF_V4DF, V4SF_FTYPE_PCV4SF_V4SF, V2DF_FTYPE_PCV2DF_V2DF, VOID_FTYPE_PV8SF_V8SF_V8SF, VOID_FTYPE_PV4DF_V4DF_V4DF, VOID_FTYPE_PV4SF_V4SF_V4SF and VOID_FTYPE_PV2DF_V2DF_V2DF. Add V8SF_FTYPE_PCV8SF_V8SI, V4DF_FTYPE_PCV4DF_V4DI, V4SF_FTYPE_PCV4SF_V4SI, V2DF_FTYPE_PCV2DF_V2DI, VOID_FTYPE_PV8SF_V8SI_V8SF, VOID_FTYPE_PV4DF_V4DI_V4DF, VOID_FTYPE_PV4SF_V4SI_V4SF and VOID_FTYPE_PV2DF_V2DI_V2DF. (bdesc_special_args): Update __builtin_ia32_maskloadpd, __builtin_ia32_maskloadps, __builtin_ia32_maskloadpd256, __builtin_ia32_maskloadps256, __builtin_ia32_maskstorepd, __builtin_ia32_maskstoreps, __builtin_ia32_maskstorepd256 and __builtin_ia32_maskstoreps256. (ix86_init_mmx_sse_builtins): Updated. * config/i386/sse.md (avx_maskload): Use on mask register. (avx_maskstore): Likewise. gcc/testsuite/ 2011-01-17 H.J. Lu Backport from mainline 2011-01-17 H.J. Lu PR target/47318 * gcc.target/i386/avx-vmaskmovpd-1.c: New. * gcc.target/i386/avx-vmaskmovpd-2.c: Likewise. * gcc.target/i386/avx-vmaskmovps-1.c: Likewise. * gcc.target/i386/avx-vmaskmovps-1.c: Likewise. * gcc.target/i386/avx-vmaskmovpd-256-1.c (avx_test): Load mask as __m256i. * gcc.target/i386/avx-vmaskmovpd-256-2.c (avx_test): Likewise. * gcc.target/i386/avx-vmaskmovps-256-1.c (avx_test): Likewise. * gcc.target/i386/avx-vmaskmovps-256-2.c (avx_test): Likewise. From-SVN: r168904 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c9358d4e9ff7..412ceccfbff4 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,42 @@ +2011-01-17 H.J. Lu + + Backport from mainline + 2011-01-17 H.J. Lu + + PR target/47318 + * config/i386/avxintrin.h (_mm_maskload_pd): Change mask to + __m128i. + (_mm_maskstore_pd): Likewise. + (_mm_maskload_ps): Likewise. + (_mm_maskstore_ps): Likewise. + (_mm256_maskload_pd): Change mask to __m256i. + (_mm256_maskstore_pd): Likewise. + (_mm256_maskload_ps): Likewise. + (_mm256_maskstore_ps): Likewise. + + * config/i386/i386-builtin-types.def: Updated. + (ix86_expand_special_args_builtin): Likewise. + + * config/i386/i386.c (ix86_special_builtin_type): Remove + V8SF_FTYPE_PCV8SF_V8SF, V4DF_FTYPE_PCV4DF_V4DF, + V4SF_FTYPE_PCV4SF_V4SF, V2DF_FTYPE_PCV2DF_V2DF, + VOID_FTYPE_PV8SF_V8SF_V8SF, VOID_FTYPE_PV4DF_V4DF_V4DF, + VOID_FTYPE_PV4SF_V4SF_V4SF and VOID_FTYPE_PV2DF_V2DF_V2DF. + Add V8SF_FTYPE_PCV8SF_V8SI, V4DF_FTYPE_PCV4DF_V4DI, + V4SF_FTYPE_PCV4SF_V4SI, V2DF_FTYPE_PCV2DF_V2DI, + VOID_FTYPE_PV8SF_V8SI_V8SF, VOID_FTYPE_PV4DF_V4DI_V4DF, + VOID_FTYPE_PV4SF_V4SI_V4SF and VOID_FTYPE_PV2DF_V2DI_V2DF. + (bdesc_special_args): Update + __builtin_ia32_maskloadpd, __builtin_ia32_maskloadps, + __builtin_ia32_maskloadpd256, __builtin_ia32_maskloadps256, + __builtin_ia32_maskstorepd, __builtin_ia32_maskstoreps, + __builtin_ia32_maskstorepd256 and __builtin_ia32_maskstoreps256. + (ix86_init_mmx_sse_builtins): Updated. + + * config/i386/sse.md (avx_maskload): + Use on mask register. + (avx_maskstore): Likewise. + 2011-01-16 Jakub Jelinek Backport from mainline diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h index 26925fd7fbbb..70bfce1a2746 100644 --- a/gcc/config/i386/avxintrin.h +++ b/gcc/config/i386/avxintrin.h @@ -890,55 +890,55 @@ _mm256_storeu_si256 (__m256i *__P, __m256i __A) } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskload_pd (double const *__P, __m128d __M) +_mm_maskload_pd (double const *__P, __m128i __M) { return (__m128d) __builtin_ia32_maskloadpd ((const __v2df *)__P, - (__v2df)__M); + (__v2di)__M); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskstore_pd (double *__P, __m128d __M, __m128d __A) +_mm_maskstore_pd (double *__P, __m128i __M, __m128d __A) { - __builtin_ia32_maskstorepd ((__v2df *)__P, (__v2df)__M, (__v2df)__A); + __builtin_ia32_maskstorepd ((__v2df *)__P, (__v2di)__M, (__v2df)__A); } extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskload_pd (double const *__P, __m256d __M) +_mm256_maskload_pd (double const *__P, __m256i __M) { return (__m256d) __builtin_ia32_maskloadpd256 ((const __v4df *)__P, - (__v4df)__M); + (__v4di)__M); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskstore_pd (double *__P, __m256d __M, __m256d __A) +_mm256_maskstore_pd (double *__P, __m256i __M, __m256d __A) { - __builtin_ia32_maskstorepd256 ((__v4df *)__P, (__v4df)__M, (__v4df)__A); + __builtin_ia32_maskstorepd256 ((__v4df *)__P, (__v4di)__M, (__v4df)__A); } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskload_ps (float const *__P, __m128 __M) +_mm_maskload_ps (float const *__P, __m128i __M) { return (__m128) __builtin_ia32_maskloadps ((const __v4sf *)__P, - (__v4sf)__M); + (__v4si)__M); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm_maskstore_ps (float *__P, __m128 __M, __m128 __A) +_mm_maskstore_ps (float *__P, __m128i __M, __m128 __A) { - __builtin_ia32_maskstoreps ((__v4sf *)__P, (__v4sf)__M, (__v4sf)__A); + __builtin_ia32_maskstoreps ((__v4sf *)__P, (__v4si)__M, (__v4sf)__A); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskload_ps (float const *__P, __m256 __M) +_mm256_maskload_ps (float const *__P, __m256i __M) { return (__m256) __builtin_ia32_maskloadps256 ((const __v8sf *)__P, - (__v8sf)__M); + (__v8si)__M); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_maskstore_ps (float *__P, __m256 __M, __m256 __A) +_mm256_maskstore_ps (float *__P, __m256i __M, __m256 __A) { - __builtin_ia32_maskstoreps256 ((__v8sf *)__P, (__v8sf)__M, (__v8sf)__A); + __builtin_ia32_maskstoreps256 ((__v8sf *)__P, (__v8si)__M, (__v8sf)__A); } extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 463f169a73cd..4e6f6895dcc4 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -20806,12 +20806,12 @@ enum ix86_special_builtin_type V4DF_FTYPE_PCDOUBLE, V4SF_FTYPE_PCFLOAT, V2DF_FTYPE_PCDOUBLE, - V8SF_FTYPE_PCV8SF_V8SF, - V4DF_FTYPE_PCV4DF_V4DF, + V8SF_FTYPE_PCV8SF_V8SI, + V4DF_FTYPE_PCV4DF_V4DI, V4SF_FTYPE_V4SF_PCV2SF, - V4SF_FTYPE_PCV4SF_V4SF, + V4SF_FTYPE_PCV4SF_V4SI, V2DF_FTYPE_V2DF_PCDOUBLE, - V2DF_FTYPE_PCV2DF_V2DF, + V2DF_FTYPE_PCV2DF_V2DI, V2DI_FTYPE_PV2DI, VOID_FTYPE_PV2SF_V4SF, VOID_FTYPE_PV4DI_V4DI, @@ -20824,10 +20824,10 @@ enum ix86_special_builtin_type VOID_FTYPE_PDOUBLE_V2DF, VOID_FTYPE_PDI_DI, VOID_FTYPE_PINT_INT, - VOID_FTYPE_PV8SF_V8SF_V8SF, - VOID_FTYPE_PV4DF_V4DF_V4DF, - VOID_FTYPE_PV4SF_V4SF_V4SF, - VOID_FTYPE_PV2DF_V2DF_V2DF + VOID_FTYPE_PV8SF_V8SI_V8SF, + VOID_FTYPE_PV4DF_V4DI_V4DF, + VOID_FTYPE_PV4SF_V4SI_V4SF, + VOID_FTYPE_PV2DF_V2DI_V2DF }; /* Builtin types */ @@ -21058,14 +21058,14 @@ static const struct builtin_description bdesc_special_args[] = { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF }, { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF }, - { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SI }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DI_V2DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SI_V4SF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DI_V4DF }, + { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SI_V8SF }, }; /* Builtins with variable number of arguments. */ @@ -22620,40 +22620,40 @@ ix86_init_mmx_sse_builtins (void) = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0)); tree pcv4df_type_node = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0)); - tree v8sf_ftype_pcv8sf_v8sf + tree v8sf_ftype_pcv8sf_v8si = build_function_type_list (V8SF_type_node, - pcv8sf_type_node, V8SF_type_node, + pcv8sf_type_node, V8SI_type_node, NULL_TREE); - tree v4df_ftype_pcv4df_v4df + tree v4df_ftype_pcv4df_v4di = build_function_type_list (V4DF_type_node, - pcv4df_type_node, V4DF_type_node, + pcv4df_type_node, V4DI_type_node, NULL_TREE); - tree v4sf_ftype_pcv4sf_v4sf + tree v4sf_ftype_pcv4sf_v4si = build_function_type_list (V4SF_type_node, - pcv4sf_type_node, V4SF_type_node, + pcv4sf_type_node, V4SI_type_node, NULL_TREE); - tree v2df_ftype_pcv2df_v2df + tree v2df_ftype_pcv2df_v2di = build_function_type_list (V2DF_type_node, - pcv2df_type_node, V2DF_type_node, + pcv2df_type_node, V2DI_type_node, NULL_TREE); - tree void_ftype_pv8sf_v8sf_v8sf + tree void_ftype_pv8sf_v8si_v8sf = build_function_type_list (void_type_node, - pv8sf_type_node, V8SF_type_node, + pv8sf_type_node, V8SI_type_node, V8SF_type_node, NULL_TREE); - tree void_ftype_pv4df_v4df_v4df + tree void_ftype_pv4df_v4di_v4df = build_function_type_list (void_type_node, - pv4df_type_node, V4DF_type_node, + pv4df_type_node, V4DI_type_node, V4DF_type_node, NULL_TREE); - tree void_ftype_pv4sf_v4sf_v4sf + tree void_ftype_pv4sf_v4si_v4sf = build_function_type_list (void_type_node, - pv4sf_type_node, V4SF_type_node, + pv4sf_type_node, V4SI_type_node, V4SF_type_node, NULL_TREE); - tree void_ftype_pv2df_v2df_v2df + tree void_ftype_pv2df_v2di_v2df = build_function_type_list (void_type_node, - pv2df_type_node, V2DF_type_node, + pv2df_type_node, V2DI_type_node, V2DF_type_node, NULL_TREE); tree v4df_ftype_v2df @@ -22759,23 +22759,23 @@ ix86_init_mmx_sse_builtins (void) case V2DF_FTYPE_PCDOUBLE: type = v2df_ftype_pcdouble; break; - case V8SF_FTYPE_PCV8SF_V8SF: - type = v8sf_ftype_pcv8sf_v8sf; + case V8SF_FTYPE_PCV8SF_V8SI: + type = v8sf_ftype_pcv8sf_v8si; break; - case V4DF_FTYPE_PCV4DF_V4DF: - type = v4df_ftype_pcv4df_v4df; + case V4DF_FTYPE_PCV4DF_V4DI: + type = v4df_ftype_pcv4df_v4di; break; case V4SF_FTYPE_V4SF_PCV2SF: type = v4sf_ftype_v4sf_pcv2sf; break; - case V4SF_FTYPE_PCV4SF_V4SF: - type = v4sf_ftype_pcv4sf_v4sf; + case V4SF_FTYPE_PCV4SF_V4SI: + type = v4sf_ftype_pcv4sf_v4si; break; case V2DF_FTYPE_V2DF_PCDOUBLE: type = v2df_ftype_v2df_pcdouble; break; - case V2DF_FTYPE_PCV2DF_V2DF: - type = v2df_ftype_pcv2df_v2df; + case V2DF_FTYPE_PCV2DF_V2DI: + type = v2df_ftype_pcv2df_v2di; break; case VOID_FTYPE_PV2SF_V4SF: type = void_ftype_pv2sf_v4sf; @@ -22810,17 +22810,17 @@ ix86_init_mmx_sse_builtins (void) case VOID_FTYPE_PINT_INT: type = void_ftype_pint_int; break; - case VOID_FTYPE_PV8SF_V8SF_V8SF: - type = void_ftype_pv8sf_v8sf_v8sf; + case VOID_FTYPE_PV8SF_V8SI_V8SF: + type = void_ftype_pv8sf_v8si_v8sf; break; - case VOID_FTYPE_PV4DF_V4DF_V4DF: - type = void_ftype_pv4df_v4df_v4df; + case VOID_FTYPE_PV4DF_V4DI_V4DF: + type = void_ftype_pv4df_v4di_v4df; break; - case VOID_FTYPE_PV4SF_V4SF_V4SF: - type = void_ftype_pv4sf_v4sf_v4sf; + case VOID_FTYPE_PV4SF_V4SI_V4SF: + type = void_ftype_pv4sf_v4si_v4sf; break; - case VOID_FTYPE_PV2DF_V2DF_V2DF: - type = void_ftype_pv2df_v2df_v2df; + case VOID_FTYPE_PV2DF_V2DI_V2DF: + type = void_ftype_pv2df_v2di_v2df; break; default: gcc_unreachable (); @@ -24650,18 +24650,18 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, klass = load; memory = 1; break; - case V8SF_FTYPE_PCV8SF_V8SF: - case V4DF_FTYPE_PCV4DF_V4DF: - case V4SF_FTYPE_PCV4SF_V4SF: - case V2DF_FTYPE_PCV2DF_V2DF: + case V8SF_FTYPE_PCV8SF_V8SI: + case V4DF_FTYPE_PCV4DF_V4DI: + case V4SF_FTYPE_PCV4SF_V4SI: + case V2DF_FTYPE_PCV2DF_V2DI: nargs = 2; klass = load; memory = 0; break; - case VOID_FTYPE_PV8SF_V8SF_V8SF: - case VOID_FTYPE_PV4DF_V4DF_V4DF: - case VOID_FTYPE_PV4SF_V4SF_V4SF: - case VOID_FTYPE_PV2DF_V2DF_V2DF: + case VOID_FTYPE_PV8SF_V8SI_V8SF: + case VOID_FTYPE_PV4DF_V4DI_V4DF: + case VOID_FTYPE_PV4SF_V4SI_V4SF: + case VOID_FTYPE_PV2DF_V2DI_V2DF: nargs = 2; klass = store; /* Reserve memory operand for target. */ diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index e5eba2ad6ff1..73199b15e331 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -11657,7 +11657,7 @@ [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") (unspec:AVXMODEF2P [(match_operand:AVXMODEF2P 1 "memory_operand" "m") - (match_operand:AVXMODEF2P 2 "register_operand" "x") + (match_operand: 2 "register_operand" "x") (match_dup 0)] UNSPEC_MASKLOAD))] "TARGET_AVX" @@ -11669,7 +11669,7 @@ (define_insn "avx_maskstorep" [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m") (unspec:AVXMODEF2P - [(match_operand:AVXMODEF2P 1 "register_operand" "x") + [(match_operand: 1 "register_operand" "x") (match_operand:AVXMODEF2P 2 "register_operand" "x") (match_dup 0)] UNSPEC_MASKSTORE))] diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 7923c4042d9e..c5c80e5af2e6 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,20 @@ +2011-01-17 H.J. Lu + + Backport from mainline + 2011-01-17 H.J. Lu + + PR target/47318 + * gcc.target/i386/avx-vmaskmovpd-1.c: New. + * gcc.target/i386/avx-vmaskmovpd-2.c: Likewise. + * gcc.target/i386/avx-vmaskmovps-1.c: Likewise. + * gcc.target/i386/avx-vmaskmovps-1.c: Likewise. + + * gcc.target/i386/avx-vmaskmovpd-256-1.c (avx_test): Load mask + as __m256i. + * gcc.target/i386/avx-vmaskmovpd-256-2.c (avx_test): Likewise. + * gcc.target/i386/avx-vmaskmovps-256-1.c (avx_test): Likewise. + * gcc.target/i386/avx-vmaskmovps-256-2.c (avx_test): Likewise. + 2011-01-16 Jakub Jelinek Backport from mainline diff --git a/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-1.c b/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-1.c new file mode 100644 index 000000000000..6204ebd28720 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-1.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O2 -mavx" } */ + +#include "avx-check.h" + +#ifndef MASK +#define MASK 7 +#endif + +#define mask_v(pos) (((MASK & (0x1ULL << (pos))) >> (pos)) << 63) + +void static +avx_test (void) +{ + int i; + long long m[2] = {mask_v(0), mask_v(1)}; + double s[2] = {1.1, 2.2}; + union128d u; + union128i_q mask; + double e[2] = {0.0}; + + mask.x = _mm_loadu_si128 ((__m128i *)m); + u.x = _mm_maskload_pd (s, mask.x); + + for (i = 0 ; i < 2; i++) + e[i] = m[i] ? s[i] : 0; + + if (check_union128d (u, e)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-2.c b/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-2.c new file mode 100644 index 000000000000..6bc620755f9a --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-2.c @@ -0,0 +1,33 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O2 -mavx" } */ + +#include "avx-check.h" + +#ifndef MASK +#define MASK 6 +#endif + +#define mask_v(pos) (((MASK & (0x1ULL << (pos))) >> (pos)) << 63) + +void static +avx_test (void) +{ + int i; + long long m[2] = {mask_v(0), mask_v(1)}; + double s[2] = {1.1, 2.2}; + double e[2] = {0.0}; + double d[2] = {0.0}; + union128d src; + union128i_q mask; + + src.x = _mm_loadu_pd (s); + mask.x = _mm_loadu_si128 ((__m128i *)m); + _mm_maskstore_pd (d, mask.x, src.x); + + for (i = 0 ; i < 2; i++) + e[i] = m[i] ? s[i] : 0; + + if (checkVd (d, e, 2)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-256-1.c b/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-256-1.c index f29826bbbd4c..e591c05c8e87 100644 --- a/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-256-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-256-1.c @@ -14,12 +14,13 @@ void static avx_test (void) { int i; - long long m[8] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3)}; + long long m[4] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3)}; double s[4] = {1.1, 2.2, 3.3, 4.4}; - union256d u, mask; + union256d u; + union256i_q mask; double e [4] = {0.0}; - mask.x = _mm256_loadu_pd ((double*)m); + mask.x = _mm256_loadu_si256 ((__m256i *)m); u.x = _mm256_maskload_pd (s, mask.x); for (i = 0 ; i < 4; i++) diff --git a/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-256-2.c b/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-256-2.c index 1e574b6993ba..5df2f94ee5f9 100644 --- a/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-256-2.c +++ b/gcc/testsuite/gcc.target/i386/avx-vmaskmovpd-256-2.c @@ -18,10 +18,11 @@ avx_test (void) double s[4] = {1.1, 2.2, 3.3, 4.4}; double e [4] = {0.0}; double d [4] = {0.0}; - union256d src, mask; + union256d src; + union256i_q mask; src.x = _mm256_loadu_pd (s); - mask.x = _mm256_loadu_pd ((double*)m); + mask.x = _mm256_loadu_si256 ((__m256i *)m); _mm256_maskstore_pd (d, mask.x, src.x); for (i = 0 ; i < 4; i++) diff --git a/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-1.c b/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-1.c new file mode 100644 index 000000000000..360a04dbaaa7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-1.c @@ -0,0 +1,31 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O2 -mavx" } */ + +#include "avx-check.h" + +#ifndef MASK +#define MASK 134 +#endif + +#define mask_v(pos) (((MASK & (0x1 << (pos))) >> (pos)) << 31) + +void static +avx_test (void) +{ + int i; + int m[4] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3)}; + float s[4] = {1,2,3,4}; + union128 u; + union128i_d mask; + float e[4] = {0.0}; + + mask.x = _mm_loadu_si128 ((__m128i *)m); + u.x = _mm_maskload_ps (s, mask.x); + + for (i = 0 ; i < 4; i++) + e[i] = m[i] ? s[i] : 0; + + if (check_union128 (u, e)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-2.c b/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-2.c new file mode 100644 index 000000000000..3dde96557c03 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-2.c @@ -0,0 +1,33 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-O2 -mavx" } */ + +#include "avx-check.h" + +#ifndef MASK +#define MASK 214 +#endif + +#define mask_v(pos) (((MASK & (0x1 << (pos))) >> (pos)) << 31) + +void static +avx_test (void) +{ + int i; + int m[4] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3)}; + float s[4] = {1,2,3,4}; + union128 src; + union128i_d mask; + float e[4] = {0.0}; + float d[4] = {0.0}; + + src.x = _mm_loadu_ps (s); + mask.x = _mm_loadu_si128 ((__m128i *)m); + _mm_maskstore_ps (d, mask.x, src.x); + + for (i = 0 ; i < 4; i++) + e[i] = m[i] ? s[i] : 0; + + if (checkVf (d, e, 4)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-256-1.c b/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-256-1.c index 9e6c7f91d914..647ce3f6e61b 100644 --- a/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-256-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-256-1.c @@ -16,10 +16,11 @@ avx_test (void) int i; int m[8] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3), mask_v(4), mask_v(5), mask_v(6), mask_v(7)}; float s[8] = {1,2,3,4,5,6,7,8}; - union256 u, mask; + union256 u; + union256i_d mask; float e [8] = {0.0}; - mask.x = _mm256_loadu_ps ((float*)m); + mask.x = _mm256_loadu_si256 ((__m256i *)m); u.x = _mm256_maskload_ps (s, mask.x); for (i = 0 ; i < 8; i++) diff --git a/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-256-2.c b/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-256-2.c index 90d91a06a6d0..016904d46388 100644 --- a/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-256-2.c +++ b/gcc/testsuite/gcc.target/i386/avx-vmaskmovps-256-2.c @@ -16,12 +16,13 @@ avx_test (void) int i; int m[8] = {mask_v(0), mask_v(1), mask_v(2), mask_v(3), mask_v(4), mask_v(5), mask_v(6), mask_v(7)}; float s[8] = {1,2,3,4,5,6,7,8}; - union256 src, mask; + union256 src; + union256i_d mask; float e [8] = {0.0}; float d [8] = {0.0}; src.x = _mm256_loadu_ps (s); - mask.x = _mm256_loadu_ps ((float *)m); + mask.x = _mm256_loadu_si256 ((__m256i *)m); _mm256_maskstore_ps (d, mask.x, src.x); for (i = 0 ; i < 8; i++)