From: Uros Bizjak Date: Thu, 12 Apr 2012 18:37:42 +0000 (+0200) Subject: re PR target/52932 (AVX2 intrinsic _mm256_permutevar8x32_ps has wrong parameter type) X-Git-Tag: misc/gccgo-go1_1_2~3531 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=2ff5ea2db9172bd09d3cef8e6fa57dfffabd96ff;p=thirdparty%2Fgcc.git re PR target/52932 (AVX2 intrinsic _mm256_permutevar8x32_ps has wrong parameter type) PR target/52932 * config/i386/avx2intrin.h (_mm256_permutevar8x32_ps): Change second argument type to __m256i. Update call to __builtin_ia32_permvarsf256. * config/i386/sse.md (UNSPEC_VPERMVAR): New. (UNSPEC_VPERMSI, UNSPEC_VPERMSF): Remove. (avx2_permvarv8sf, avx2_permvarv8si): Switch operands 1 and 2. (avx2_permvar): Macroize insn from avx2_permvarv8sf and avx2_permvarv8si using VI4F_256 mode iterator. * config/i386/i386.c (bdesc_args) <__builtin_ia32_permvarsf256>: Update builtin type to V8SF_FTYPE_V8SF_V8SI. (ix86_expand_vec_perm): Update calls to gen_avx2_permvarv8si and gen_avx2_permvarv8sf. (expand_vec_perm_pshufb): Ditto. testsuite/ChangeLog: PR target/52932 * gcc.target/i386/avx2-vpermps-1.c (avx2_test): Use __m256i type for second function argument. * gcc.target/i386/avx2-vpermps-2.c (init_permps): Update declaration. (calc_permps): Update declaration. Calculate result correctly. (avx2_test): Change src2 type to union256i_d. * gcc.target/i386/avx2-vpermd-2.c (calc_permd): Calculate result correctly. From-SVN: r186388 --- diff --git a/gcc/ChangeLog b/gcc/ChangeLog index fb291e74c4c1..9f1468314898 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,19 @@ +2012-04-12 Uros Bizjak + + PR target/52932 + * config/i386/avx2intrin.h (_mm256_permutevar8x32_ps): Change second + argument type to __m256i. Update call to __builtin_ia32_permvarsf256. + * config/i386/sse.md (UNSPEC_VPERMVAR): New. + (UNSPEC_VPERMSI, UNSPEC_VPERMSF): Remove. + (avx2_permvarv8sf, avx2_permvarv8si): Switch operands 1 and 2. + (avx2_permvar): Macroize insn from avx2_permvarv8sf and + avx2_permvarv8si using VI4F_256 mode iterator. + * config/i386/i386.c (bdesc_args) <__builtin_ia32_permvarsf256>: + Update builtin type to V8SF_FTYPE_V8SF_V8SI. + (ix86_expand_vec_perm): Update calls to gen_avx2_permvarv8si and + gen_avx2_permvarv8sf. + (expand_vec_perm_pshufb): Ditto. + 2012-04-12 Michael Meissner PR target/52775 @@ -200,8 +216,7 @@ 2012-04-11 Richard Guenther PR middle-end/52918 - * except.c (sjlj_emit_dispatch_table): Properly update loop - structure. + * except.c (sjlj_emit_dispatch_table): Properly update loop structure. 2012-04-11 Nick Clifton @@ -426,8 +441,7 @@ * tree-pass.h (tree_lowering_passes): Remove. * tree-optimize.c (tree_lowering_passes): Remove. * cgraph.c (cgraph_add_new_function): Inline relevant parts - of tree_lowering_passes, avoid redundant call of early local - passes. + of tree_lowering_passes, avoid redundant call of early local passes. * cgraphunit.c (cgraph_lower_function): Fold into ... (cgraph_analyze_function): ... its single caller. Inline relevant parts of tree_lowering_passes. @@ -442,8 +456,8 @@ PR lto/52722 PR lto/51765 PR lto/52634 - * lto-cgraph.c (compute_ltrans_boundary): When alias is in the boundary, - add its target too. + * lto-cgraph.c (compute_ltrans_boundary): When alias is in the + boundary, add its target too. * lto.c (add_references_to_partition): Add also aliased nodes. (add_cgraph_node_to_partition, add_varpool_node_to_partition): Work on nodes, not functions/variables; @@ -564,8 +578,7 @@ 2012-04-05 Teresa Johnson H.J. Lu - * config/i386/i386.h (ix86_tune_indices): Add - X86_TUNE_LCP_STALL. + * config/i386/i386.h (ix86_tune_indices): Add X86_TUNE_LCP_STALL. * config/i386/i386.md (move immediate to memory peephole2): Add cases for HImode move when LCP stall avoidance is needed. * config/i386/i386.c (initial_ix86_tune_features): Initialize @@ -642,8 +655,7 @@ 2012-04-04 Mike Stump * doc/rtl.texi (const_double): Document as sign-extending. - * expmed.c (expand_mult): Ensure we don't use shift - incorrectly. + * expmed.c (expand_mult): Ensure we don't use shift incorrectly. * emit-rtl.c (immed_double_int_const): Refine to state the value is signed. * simplify-rtx.c (mode_signbit_p): Add a fixme for wider than @@ -651,8 +663,7 @@ (simplify_const_unary_operation, UNSIGNED_FLOAT): Ensure no negative values are converted. Fix conversions bigger than HOST_BITS_PER_WIDE_INT. - (simplify_binary_operation_1): Ensure we don't use shift - incorrectly. + (simplify_binary_operation_1): Ensure we don't use shift incorrectly. (simplify_immed_subreg): Sign-extend CONST_DOUBLEs. * explow.c (plus_constant_mode): Add. (plus_constant): Implement with plus_constant_mode. @@ -661,8 +672,7 @@ 2012-04-04 Richard Guenther PR tree-optimization/52808 - * tracer.c (tail_duplicate): Do not tail-duplicate loop header - blocks. + * tracer.c (tail_duplicate): Do not tail-duplicate loop header blocks. * Makefile.in (tracer.o): Depend on $(CFGLOOP_H). 2012-04-04 Tristan Gingold @@ -692,8 +702,8 @@ * h8300/h8300.md: Generate 'rte' for monitor functions. Do not save EXR on stack for monitor function in case of H8S target when "-mno-exr" is passed. - * h8300/h8300-protos.h - (h8300_current_function_monitor_function_p): Add prototype. + * h8300/h8300-protos.h (h8300_current_function_monitor_function_p): + Add prototype. * doc/invoke.texi: Document H8S options. 2012-04-03 Tristan Gingold diff --git a/gcc/config/i386/avx2intrin.h b/gcc/config/i386/avx2intrin.h index 12ed05fe0298..cebd9a2fced3 100644 --- a/gcc/config/i386/avx2intrin.h +++ b/gcc/config/i386/avx2intrin.h @@ -1034,9 +1034,9 @@ _mm256_permute4x64_pd (__m256d __X, const int __M) extern __inline __m256 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) -_mm256_permutevar8x32_ps (__m256 __X, __m256 __Y) +_mm256_permutevar8x32_ps (__m256 __X, __m256i __Y) { - return (__m256) __builtin_ia32_permvarsf256 ((__v8sf)__X,(__v8sf)__Y); + return (__m256) __builtin_ia32_permvarsf256 ((__v8sf)__X, (__v8si)__Y); } #ifdef __OPTIMIZE__ diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index af4af7ce134b..abe3f1b96ca2 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -19937,7 +19937,7 @@ ix86_expand_vec_perm (rtx operands[]) vt = force_reg (maskmode, vt); mask = gen_lowpart (maskmode, mask); if (maskmode == V8SImode) - emit_insn (gen_avx2_permvarv8si (t1, vt, mask)); + emit_insn (gen_avx2_permvarv8si (t1, mask, vt)); else emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt)); @@ -19971,13 +19971,13 @@ ix86_expand_vec_perm (rtx operands[]) the high bits of the shuffle elements. No need for us to perform an AND ourselves. */ if (one_operand_shuffle) - emit_insn (gen_avx2_permvarv8si (target, mask, op0)); + emit_insn (gen_avx2_permvarv8si (target, op0, mask)); else { t1 = gen_reg_rtx (V8SImode); t2 = gen_reg_rtx (V8SImode); - emit_insn (gen_avx2_permvarv8si (t1, mask, op0)); - emit_insn (gen_avx2_permvarv8si (t2, mask, op1)); + emit_insn (gen_avx2_permvarv8si (t1, op0, mask)); + emit_insn (gen_avx2_permvarv8si (t2, op0, mask)); goto merge_two; } return; @@ -19985,13 +19985,13 @@ ix86_expand_vec_perm (rtx operands[]) case V8SFmode: mask = gen_lowpart (V8SFmode, mask); if (one_operand_shuffle) - emit_insn (gen_avx2_permvarv8sf (target, mask, op0)); + emit_insn (gen_avx2_permvarv8sf (target, op0, mask)); else { t1 = gen_reg_rtx (V8SFmode); t2 = gen_reg_rtx (V8SFmode); - emit_insn (gen_avx2_permvarv8sf (t1, mask, op0)); - emit_insn (gen_avx2_permvarv8sf (t2, mask, op1)); + emit_insn (gen_avx2_permvarv8sf (t1, op0, mask)); + emit_insn (gen_avx2_permvarv8sf (t2, op1, mask)); goto merge_two; } return; @@ -20004,7 +20004,7 @@ ix86_expand_vec_perm (rtx operands[]) t2 = gen_reg_rtx (V8SImode); emit_insn (gen_avx_vec_concatv8si (t1, op0, op1)); emit_insn (gen_avx_vec_concatv8si (t2, mask, mask)); - emit_insn (gen_avx2_permvarv8si (t1, t2, t1)); + emit_insn (gen_avx2_permvarv8si (t1, t1, t2)); emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx)); return; @@ -20014,7 +20014,7 @@ ix86_expand_vec_perm (rtx operands[]) mask = gen_lowpart (V4SFmode, mask); emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1)); emit_insn (gen_avx_vec_concatv8sf (t2, mask, mask)); - emit_insn (gen_avx2_permvarv8sf (t1, t2, t1)); + emit_insn (gen_avx2_permvarv8sf (t1, t1, t2)); emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx)); return; @@ -26948,8 +26948,8 @@ static const struct builtin_description bdesc_args[] = { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI }, + { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT }, - { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT }, { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT }, @@ -36126,9 +36126,9 @@ expand_vec_perm_pshufb (struct expand_vec_perm_d *d) else if (vmode == V32QImode) emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm)); else if (vmode == V8SFmode) - emit_insn (gen_avx2_permvarv8sf (target, vperm, op0)); + emit_insn (gen_avx2_permvarv8sf (target, op0, vperm)); else - emit_insn (gen_avx2_permvarv8si (target, vperm, op0)); + emit_insn (gen_avx2_permvarv8si (target, op0, vperm)); } else { diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b63d774e43f7..ffa0729d7410 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -79,8 +79,7 @@ UNSPEC_VCVTPS2PH ;; For AVX2 support - UNSPEC_VPERMSI - UNSPEC_VPERMSF + UNSPEC_VPERMVAR UNSPEC_VPERMTI UNSPEC_GATHER UNSPEC_VSIBADDR @@ -11901,26 +11900,14 @@ (set_attr "prefix" "vex") (set_attr "mode" "")]) -(define_insn "avx2_permvarv8si" - [(set (match_operand:V8SI 0 "register_operand" "=x") - (unspec:V8SI - [(match_operand:V8SI 1 "register_operand" "x") - (match_operand:V8SI 2 "nonimmediate_operand" "xm")] - UNSPEC_VPERMSI))] - "TARGET_AVX2" - "vpermd\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "type" "sselog") - (set_attr "prefix" "vex") - (set_attr "mode" "OI")]) - -(define_insn "avx2_permvarv8sf" - [(set (match_operand:V8SF 0 "register_operand" "=x") - (unspec:V8SF - [(match_operand:V8SF 1 "register_operand" "x") - (match_operand:V8SF 2 "nonimmediate_operand" "xm")] - UNSPEC_VPERMSF))] +(define_insn "avx2_permvar" + [(set (match_operand:VI4F_256 0 "register_operand" "=x") + (unspec:VI4F_256 + [(match_operand:VI4F_256 1 "nonimmediate_operand" "xm") + (match_operand:V8SI 2 "register_operand" "x")] + UNSPEC_VPERMVAR))] "TARGET_AVX2" - "vpermps\t{%2, %1, %0|%0, %1, %2}" + "vperm\t{%1, %2, %0|%0, %2, %1}" [(set_attr "type" "sselog") (set_attr "prefix" "vex") (set_attr "mode" "OI")]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 7a5f47f013f3..d3f14645f1aa 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,14 @@ +2012-04-12 Uros Bizjak + + PR target/52932 + * gcc.target/i386/avx2-vpermps-1.c (avx2_test): Use __m256i type for + second function argument. + * gcc.target/i386/avx2-vpermps-2.c (init_permps): Update declaration. + (calc_permps): Update declaration. Calculate result correctly. + (avx2_test): Change src2 type to union256i_d. + * gcc.target/i386/avx2-vpermd-2.c (calc_permd): Calculate result + correctly. + 2012-04-12 Michael Meissner PR target/52775 @@ -212,7 +223,7 @@ PR lto/52722 PR lto/51765 - PR lto/52634 + PR lto/52634 * gcc.dg/lto/pr52634_1.c: New testcase. * gcc.dg/lto/pr52634_0.c: New testcase. @@ -571,7 +582,7 @@ int32plus. * gcc.dg/torture/pr48124-4.c: Ditto: * gcc.dg/torture/pr52530.c: Use long instead of int if int=16. - + 2012-03-20 Jason Merrill PR c++/52510 @@ -666,11 +677,11 @@ 2012-03-15 Janne Blomqvist - PR libfortran/52434 - PR libfortran/48878 - PR libfortran/38199 - * gfortran.dg/edit_real_1.f90: Don't assume roundTiesToAway. - * gfortran.dg/round_1.f03: Likewise. + PR libfortran/52434 + PR libfortran/48878 + PR libfortran/38199 + * gfortran.dg/edit_real_1.f90: Don't assume roundTiesToAway. + * gfortran.dg/round_1.f03: Likewise. 2012-03-15 Jakub Jelinek Andrew Pinski @@ -1833,7 +1844,7 @@ 2012-02-06 Andrey Belevantsev - * gcc.dg/pr48374.c: Actually add the test I forgot + * gcc.dg/pr48374.c: Actually add the test I forgot in the 2012-01-25 commit. 2012-02-05 Thomas König diff --git a/gcc/testsuite/gcc.target/i386/avx2-vpermd-2.c b/gcc/testsuite/gcc.target/i386/avx2-vpermd-2.c index 4a4e73c78fb5..a663337e9a99 100644 --- a/gcc/testsuite/gcc.target/i386/avx2-vpermd-2.c +++ b/gcc/testsuite/gcc.target/i386/avx2-vpermd-2.c @@ -29,8 +29,8 @@ calc_permd (int *src1, int *src2, int *dst) memcpy (dst, src1, 32); for (i = 0; i < 8; i++) { - temp = src1[i]; - dst[i] = src2[temp & 7]; + temp = src2[i]; + dst[i] = src1[temp & 7]; } } diff --git a/gcc/testsuite/gcc.target/i386/avx2-vpermps-1.c b/gcc/testsuite/gcc.target/i386/avx2-vpermps-1.c index 3346b23531f1..bf436599d54c 100644 --- a/gcc/testsuite/gcc.target/i386/avx2-vpermps-1.c +++ b/gcc/testsuite/gcc.target/i386/avx2-vpermps-1.c @@ -5,9 +5,10 @@ #include __m256 x; +__m256i y; void extern avx2_test (void) { - x = _mm256_permutevar8x32_ps (x, x); + x = _mm256_permutevar8x32_ps (x, y); } diff --git a/gcc/testsuite/gcc.target/i386/avx2-vpermps-2.c b/gcc/testsuite/gcc.target/i386/avx2-vpermps-2.c index f145aa1d2141..4190189a89cd 100644 --- a/gcc/testsuite/gcc.target/i386/avx2-vpermps-2.c +++ b/gcc/testsuite/gcc.target/i386/avx2-vpermps-2.c @@ -8,7 +8,7 @@ #define NUM 10 static void -init_permps (float *src1, float *src2, int seed) +init_permps (float *src1, int *src2, int seed) { int i, sign = 1; @@ -21,24 +21,24 @@ init_permps (float *src1, float *src2, int seed) } static void -calc_permps (float *src1, float *src2, float *dst) +calc_permps (float *src1, int *src2, float *dst) { int i; unsigned temp; - unsigned *idx = (int *) src1; memcpy (dst, src1, 32); for (i = 0; i < 8; i++) { - temp = idx[i]; - dst[i] = src2[temp & 7]; + temp = src2[i]; + dst[i] = src1[temp & 7]; } } static void avx2_test (void) { - union256 src1, src2, dst; + union256 src1, dst; + union256i_d src2; float dst_ref[8]; int i;