+2012-04-12 Uros Bizjak <ubizjak@gmail.com>
+
+ PR target/52932
+ * config/i386/avx2intrin.h (_mm256_permutevar8x32_ps): Change second
+ argument type to __m256i. Update call to __builtin_ia32_permvarsf256.
+ * config/i386/sse.md (UNSPEC_VPERMVAR): New.
+ (UNSPEC_VPERMSI, UNSPEC_VPERMSF): Remove.
+ (avx2_permvarv8sf, avx2_permvarv8si): Switch operands 1 and 2.
+ (avx2_permvar<mode>): Macroize insn from avx2_permvarv8sf and
+ avx2_permvarv8si using VI4F_256 mode iterator.
+ * config/i386/i386.c (bdesc_args) <__builtin_ia32_permvarsf256>:
+ Update builtin type to V8SF_FTYPE_V8SF_V8SI.
+ (ix86_expand_vec_perm): Update calls to gen_avx2_permvarv8si and
+ gen_avx2_permvarv8sf.
+ (expand_vec_perm_pshufb): Ditto.
+
2012-04-12 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/52775
2012-04-11 Richard Guenther <rguenther@suse.de>
PR middle-end/52918
- * except.c (sjlj_emit_dispatch_table): Properly update loop
- structure.
+ * except.c (sjlj_emit_dispatch_table): Properly update loop structure.
2012-04-11 Nick Clifton <nickc@redhat.com>
* tree-pass.h (tree_lowering_passes): Remove.
* tree-optimize.c (tree_lowering_passes): Remove.
* cgraph.c (cgraph_add_new_function): Inline relevant parts
- of tree_lowering_passes, avoid redundant call of early local
- passes.
+ of tree_lowering_passes, avoid redundant call of early local passes.
* cgraphunit.c (cgraph_lower_function): Fold into ...
(cgraph_analyze_function): ... its single caller. Inline
relevant parts of tree_lowering_passes.
PR lto/52722
PR lto/51765
PR lto/52634
- * lto-cgraph.c (compute_ltrans_boundary): When alias is in the boundary,
- add its target too.
+ * lto-cgraph.c (compute_ltrans_boundary): When alias is in the
+ boundary, add its target too.
* lto.c (add_references_to_partition): Add also aliased nodes.
(add_cgraph_node_to_partition,
add_varpool_node_to_partition): Work on nodes, not functions/variables;
2012-04-05 Teresa Johnson <tejohnson@google.com>
H.J. Lu <hongjiu.lu@intel.com>
- * config/i386/i386.h (ix86_tune_indices): Add
- X86_TUNE_LCP_STALL.
+ * config/i386/i386.h (ix86_tune_indices): Add X86_TUNE_LCP_STALL.
* config/i386/i386.md (move immediate to memory peephole2):
Add cases for HImode move when LCP stall avoidance is needed.
* config/i386/i386.c (initial_ix86_tune_features): Initialize
2012-04-04 Mike Stump <mikestump@comcast.net>
* doc/rtl.texi (const_double): Document as sign-extending.
- * expmed.c (expand_mult): Ensure we don't use shift
- incorrectly.
+ * expmed.c (expand_mult): Ensure we don't use shift incorrectly.
* emit-rtl.c (immed_double_int_const): Refine to state the
value is signed.
* simplify-rtx.c (mode_signbit_p): Add a fixme for wider than
(simplify_const_unary_operation, UNSIGNED_FLOAT): Ensure no
negative values are converted. Fix conversions bigger than
HOST_BITS_PER_WIDE_INT.
- (simplify_binary_operation_1): Ensure we don't use shift
- incorrectly.
+ (simplify_binary_operation_1): Ensure we don't use shift incorrectly.
(simplify_immed_subreg): Sign-extend CONST_DOUBLEs.
* explow.c (plus_constant_mode): Add.
(plus_constant): Implement with plus_constant_mode.
2012-04-04 Richard Guenther <rguenther@suse.de>
PR tree-optimization/52808
- * tracer.c (tail_duplicate): Do not tail-duplicate loop header
- blocks.
+ * tracer.c (tail_duplicate): Do not tail-duplicate loop header blocks.
* Makefile.in (tracer.o): Depend on $(CFGLOOP_H).
2012-04-04 Tristan Gingold <gingold@adacore.com>
* h8300/h8300.md: Generate 'rte' for monitor functions. Do not
save EXR on stack for monitor function in case of H8S target
when "-mno-exr" is passed.
- * h8300/h8300-protos.h
- (h8300_current_function_monitor_function_p): Add prototype.
+ * h8300/h8300-protos.h (h8300_current_function_monitor_function_p):
+ Add prototype.
* doc/invoke.texi: Document H8S options.
2012-04-03 Tristan Gingold <gingold@adacore.com>
extern __inline __m256
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_permutevar8x32_ps (__m256 __X, __m256 __Y)
+_mm256_permutevar8x32_ps (__m256 __X, __m256i __Y)
{
- return (__m256) __builtin_ia32_permvarsf256 ((__v8sf)__X,(__v8sf)__Y);
+ return (__m256) __builtin_ia32_permvarsf256 ((__v8sf)__X, (__v8si)__Y);
}
#ifdef __OPTIMIZE__
vt = force_reg (maskmode, vt);
mask = gen_lowpart (maskmode, mask);
if (maskmode == V8SImode)
- emit_insn (gen_avx2_permvarv8si (t1, vt, mask));
+ emit_insn (gen_avx2_permvarv8si (t1, mask, vt));
else
emit_insn (gen_avx2_pshufbv32qi3 (t1, mask, vt));
the high bits of the shuffle elements. No need for us to
perform an AND ourselves. */
if (one_operand_shuffle)
- emit_insn (gen_avx2_permvarv8si (target, mask, op0));
+ emit_insn (gen_avx2_permvarv8si (target, op0, mask));
else
{
t1 = gen_reg_rtx (V8SImode);
t2 = gen_reg_rtx (V8SImode);
- emit_insn (gen_avx2_permvarv8si (t1, mask, op0));
- emit_insn (gen_avx2_permvarv8si (t2, mask, op1));
+ emit_insn (gen_avx2_permvarv8si (t1, op0, mask));
+ emit_insn (gen_avx2_permvarv8si (t2, op0, mask));
goto merge_two;
}
return;
case V8SFmode:
mask = gen_lowpart (V8SFmode, mask);
if (one_operand_shuffle)
- emit_insn (gen_avx2_permvarv8sf (target, mask, op0));
+ emit_insn (gen_avx2_permvarv8sf (target, op0, mask));
else
{
t1 = gen_reg_rtx (V8SFmode);
t2 = gen_reg_rtx (V8SFmode);
- emit_insn (gen_avx2_permvarv8sf (t1, mask, op0));
- emit_insn (gen_avx2_permvarv8sf (t2, mask, op1));
+ emit_insn (gen_avx2_permvarv8sf (t1, op0, mask));
+ emit_insn (gen_avx2_permvarv8sf (t2, op1, mask));
goto merge_two;
}
return;
t2 = gen_reg_rtx (V8SImode);
emit_insn (gen_avx_vec_concatv8si (t1, op0, op1));
emit_insn (gen_avx_vec_concatv8si (t2, mask, mask));
- emit_insn (gen_avx2_permvarv8si (t1, t2, t1));
+ emit_insn (gen_avx2_permvarv8si (t1, t1, t2));
emit_insn (gen_avx_vextractf128v8si (target, t1, const0_rtx));
return;
mask = gen_lowpart (V4SFmode, mask);
emit_insn (gen_avx_vec_concatv8sf (t1, op0, op1));
emit_insn (gen_avx_vec_concatv8sf (t2, mask, mask));
- emit_insn (gen_avx2_permvarv8sf (t1, t2, t1));
+ emit_insn (gen_avx2_permvarv8sf (t1, t1, t2));
emit_insn (gen_avx_vextractf128v8sf (target, t1, const0_rtx));
return;
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv4si, "__builtin_ia32_pbroadcastd128", IX86_BUILTIN_PBROADCASTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pbroadcastv2di, "__builtin_ia32_pbroadcastq128", IX86_BUILTIN_PBROADCASTQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8si, "__builtin_ia32_permvarsi256", IX86_BUILTIN_VPERMVARSI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
+ { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4df, "__builtin_ia32_permdf256", IX86_BUILTIN_VPERMDF256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
- { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permvarv8sf, "__builtin_ia32_permvarsf256", IX86_BUILTIN_VPERMVARSF256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv4di, "__builtin_ia32_permdi256", IX86_BUILTIN_VPERMDI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_INT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_permv2ti, "__builtin_ia32_permti256", IX86_BUILTIN_VPERMTI256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT },
{ OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_extracti128, "__builtin_ia32_extract128i256", IX86_BUILTIN_VEXTRACT128I256, UNKNOWN, (int) V2DI_FTYPE_V4DI_INT },
else if (vmode == V32QImode)
emit_insn (gen_avx2_pshufbv32qi3 (target, op0, vperm));
else if (vmode == V8SFmode)
- emit_insn (gen_avx2_permvarv8sf (target, vperm, op0));
+ emit_insn (gen_avx2_permvarv8sf (target, op0, vperm));
else
- emit_insn (gen_avx2_permvarv8si (target, vperm, op0));
+ emit_insn (gen_avx2_permvarv8si (target, op0, vperm));
}
else
{
UNSPEC_VCVTPS2PH
;; For AVX2 support
- UNSPEC_VPERMSI
- UNSPEC_VPERMSF
+ UNSPEC_VPERMVAR
UNSPEC_VPERMTI
UNSPEC_GATHER
UNSPEC_VSIBADDR
(set_attr "prefix" "vex")
(set_attr "mode" "<sseinsnmode>")])
-(define_insn "avx2_permvarv8si"
- [(set (match_operand:V8SI 0 "register_operand" "=x")
- (unspec:V8SI
- [(match_operand:V8SI 1 "register_operand" "x")
- (match_operand:V8SI 2 "nonimmediate_operand" "xm")]
- UNSPEC_VPERMSI))]
- "TARGET_AVX2"
- "vpermd\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "sselog")
- (set_attr "prefix" "vex")
- (set_attr "mode" "OI")])
-
-(define_insn "avx2_permvarv8sf"
- [(set (match_operand:V8SF 0 "register_operand" "=x")
- (unspec:V8SF
- [(match_operand:V8SF 1 "register_operand" "x")
- (match_operand:V8SF 2 "nonimmediate_operand" "xm")]
- UNSPEC_VPERMSF))]
+(define_insn "avx2_permvar<mode>"
+ [(set (match_operand:VI4F_256 0 "register_operand" "=x")
+ (unspec:VI4F_256
+ [(match_operand:VI4F_256 1 "nonimmediate_operand" "xm")
+ (match_operand:V8SI 2 "register_operand" "x")]
+ UNSPEC_VPERMVAR))]
"TARGET_AVX2"
- "vpermps\t{%2, %1, %0|%0, %1, %2}"
+ "vperm<ssemodesuffix>\t{%1, %2, %0|%0, %2, %1}"
[(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "OI")])
+2012-04-12 Uros Bizjak <ubizjak@gmail.com>
+
+ PR target/52932
+ * gcc.target/i386/avx2-vpermps-1.c (avx2_test): Use __m256i type for
+ second function argument.
+ * gcc.target/i386/avx2-vpermps-2.c (init_permps): Update declaration.
+ (calc_permps): Update declaration. Calculate result correctly.
+ (avx2_test): Change src2 type to union256i_d.
+ * gcc.target/i386/avx2-vpermd-2.c (calc_permd): Calculate result
+ correctly.
+
2012-04-12 Michael Meissner <meissner@linux.vnet.ibm.com>
PR target/52775
PR lto/52722
PR lto/51765
- PR lto/52634
+ PR lto/52634
* gcc.dg/lto/pr52634_1.c: New testcase.
* gcc.dg/lto/pr52634_0.c: New testcase.
int32plus.
* gcc.dg/torture/pr48124-4.c: Ditto:
* gcc.dg/torture/pr52530.c: Use long instead of int if int=16.
-
+
2012-03-20 Jason Merrill <jason@redhat.com>
PR c++/52510
2012-03-15 Janne Blomqvist <jb@gcc.gnu.org>
- PR libfortran/52434
- PR libfortran/48878
- PR libfortran/38199
- * gfortran.dg/edit_real_1.f90: Don't assume roundTiesToAway.
- * gfortran.dg/round_1.f03: Likewise.
+ PR libfortran/52434
+ PR libfortran/48878
+ PR libfortran/38199
+ * gfortran.dg/edit_real_1.f90: Don't assume roundTiesToAway.
+ * gfortran.dg/round_1.f03: Likewise.
2012-03-15 Jakub Jelinek <jakub@redhat.com>
Andrew Pinski <apinski@cavium.com>
2012-02-06 Andrey Belevantsev <abel@ispras.ru>
- * gcc.dg/pr48374.c: Actually add the test I forgot
+ * gcc.dg/pr48374.c: Actually add the test I forgot
in the 2012-01-25 commit.
2012-02-05 Thomas König <tkoenig@gcc.gnu.org>
memcpy (dst, src1, 32);
for (i = 0; i < 8; i++)
{
- temp = src1[i];
- dst[i] = src2[temp & 7];
+ temp = src2[i];
+ dst[i] = src1[temp & 7];
}
}
#include <immintrin.h>
__m256 x;
+__m256i y;
void extern
avx2_test (void)
{
- x = _mm256_permutevar8x32_ps (x, x);
+ x = _mm256_permutevar8x32_ps (x, y);
}
#define NUM 10
static void
-init_permps (float *src1, float *src2, int seed)
+init_permps (float *src1, int *src2, int seed)
{
int i, sign = 1;
}
static void
-calc_permps (float *src1, float *src2, float *dst)
+calc_permps (float *src1, int *src2, float *dst)
{
int i;
unsigned temp;
- unsigned *idx = (int *) src1;
memcpy (dst, src1, 32);
for (i = 0; i < 8; i++)
{
- temp = idx[i];
- dst[i] = src2[temp & 7];
+ temp = src2[i];
+ dst[i] = src1[temp & 7];
}
}
static void
avx2_test (void)
{
- union256 src1, src2, dst;
+ union256 src1, dst;
+ union256i_d src2;
float dst_ref[8];
int i;