+2015-06-03 Richard Biener <rguenther@suse.de>
+
+ Backport from mainline
+ 2015-05-26 Michael Matz <matz@suse.de>
+
+ PR middle-end/66251
+ * tree-vect-stmts.c (vect_create_vectorized_demotion_stmts): Always set
+ STMT_VINFO_VEC_STMT, also with SLP.
+
+ 2015-05-22 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/66251
+ * tree-vect-stmts.c (vectorizable_conversion): Properly
+ set STMT_VINFO_VEC_STMT even for the SLP case.
+
+ 2015-03-23 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/65518
+ * tree-vect-stmts.c (vectorizable_load): Reject single-element
+ interleaving cases we generate absymal code for.
+
+ 2015-05-13 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/66123
+ * tree-ssa-dom.c (propagate_rhs_into_lhs): Check if we found
+ a taken edge.
+
+ 2015-05-27 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/66272
+ Revert parts of
+ 2014-08-15 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/62031
+ * tree-data-ref.c (dr_analyze_indices): Do not set
+ DR_UNCONSTRAINED_BASE.
+ (dr_may_alias_p): All indirect accesses have to go the
+ formerly DR_UNCONSTRAINED_BASE path.
+ * tree-data-ref.h (struct indices): Remove
+ unconstrained_base member.
+ (DR_UNCONSTRAINED_BASE): Remove.
+
2015-06-01 Dominik Vogt <vogt@linux.vnet.ibm.com>
Backport from mainline
+2015-06-03 Richard Biener <rguenther@suse.de>
+
+ Backport from mainline
+ 2015-05-26 Michael Matz <matz@suse.de>
+
+ PR middle-end/66251
+ * gcc.dg/vect/pr66251.c: New test.
+
+ 2015-05-22 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/66251
+ * gfortran.fortran-torture/compile/pr66251.f90: New testcase.
+
+ 2015-03-23 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/65518
+ * gcc.dg/vect/pr65518.c: New testcase.
+
+ 2015-05-13 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/66123
+ * gcc.dg/torture/pr66123.c: New testcase.
+
+ 2015-05-27 Richard Biener <rguenther@suse.de>
+
+ PR tree-optimization/66272
+ * gcc.dg/torture/pr66272.c: New testcase.
+
2015-06-01 Jakub Jelinek <jakub@redhat.com>
* gcc.target/s390/hotpatch-compile-15.c: Remove dg-prune-output
--- /dev/null
+/* { dg-do compile } */
+
+int
+test (int foo)
+{
+ static void *dummy[] = { &&a, &&b };
+ goto *((char *) &&b - 2 * (foo < 0));
+a:
+b:
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */
+
+struct S
+{
+ int f0;
+ int f1;
+};
+
+int b;
+
+int main ()
+{
+ struct S a[2] = { 0 };
+ struct S d = { 0, 1 };
+ for (b = 0; b < 2; b++)
+ {
+ a[b] = d;
+ d = a[0];
+ }
+ if (d.f1 != 1)
+ __builtin_abort ();
+ return 0;
+}
--- /dev/null
+/* { dg-do run } */\r
+\r
+extern void abort (void);\r
+\r
+typedef struct giga\r
+{\r
+ unsigned int g[16];\r
+} giga;\r
+\r
+unsigned long __attribute__((noinline,noclone))\r
+addfst(giga const *gptr, int num)\r
+{\r
+ unsigned int retval = 0;\r
+ int i;\r
+ for (i = 0; i < num; i++)\r
+ retval += gptr[i].g[0];\r
+ return retval;\r
+}\r
+\r
+int main ()\r
+{\r
+ struct giga g[8];\r
+ unsigned int n = 1;\r
+ int i, j;\r
+ for (i = 0; i < 8; ++i)\r
+ for (j = 0; j < 16; ++j)\r
+ {\r
+ g[i].g[j] = n++;\r
+ __asm__ volatile ("");\r
+ }\r
+ if (addfst (g, 8) != 456)\r
+ abort ();\r
+ return 0;\r
+}\r
+\r
+/* We don't want to vectorize the single-element interleaving in the way\r
+ we currently do that (without ignoring not needed vectors in the\r
+ gap between gptr[0].g[0] and gptr[1].g[0]), because that's very\r
+ sub-optimal and causes memory explosion (even though the cost model\r
+ should reject that in the end). */\r
+\r
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops in function" 2 "vect" } } */\r
+/* { dg-final { cleanup-tree-dump "vect" } } */\r
--- /dev/null
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_double } */
+/* { dg-require-effective-target vect_floatint_cvt } */
+/* { dg-require-effective-target vect_intfloat_cvt } */
+/* { dg-require-effective-target vect_pack_trunc } */
+/* { dg-require-effective-target vect_unpack } */
+/* { dg-require-effective-target vect_hw_misalign } */
+
+#include "tree-vect.h"
+
+void __attribute__((noinline,noclone))
+test1(_Complex double *a, _Complex int *b, int stride, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ a[i*stride] = b[i*stride];
+ }
+}
+
+void __attribute__((noinline,noclone))
+test2(_Complex int *a, _Complex double *b, int stride, int n)
+{
+ int i;
+ for (i = 0; i < n; i++)
+ {
+ a[i*stride] = b[i*stride];
+ }
+}
+
+_Complex int ia[256];
+_Complex double da[256];
+
+extern void abort (void);
+
+int main ()
+{
+ int i;
+ int stride;
+
+ check_vect ();
+
+ for (stride = 1; stride < 15; stride++)
+ {
+ for (i = 0; i < 256; i++)
+ {
+ __real__ ia[i] = (i + stride) % 19;
+ __imag__ ia[i] = (i + stride) % 23;
+ __asm__ volatile ("");
+ }
+
+ test1(da, ia, stride, 256/stride);
+
+ for (i = 0; i < 256/stride; i++)
+ {
+ if (da[i*stride] != ia[i*stride])
+ abort ();
+ }
+
+ for (i = 0; i < 256; i++)
+ {
+ __real__ da[i] = (i + stride + 1) % 29;
+ __imag__ da[i] = (i + stride + 1) % 31;
+ __asm__ volatile ("");
+ }
+
+ test2(ia, da, stride, 256/stride);
+
+ for (i = 0; i < 256/stride; i++)
+ {
+ if (da[i*stride] != ia[i*stride])
+ abort ();
+ }
+ }
+ return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
--- /dev/null
+SUBROUTINE dbcsr_data_convert (n)
+ COMPLEX(KIND=4), DIMENSION(:), POINTER :: s_data_c
+ COMPLEX(KIND=8), DIMENSION(:), POINTER :: t_data_z
+ t_data_z(1:n) = CMPLX(s_data_c(1:n), KIND=8)
+ CALL foo()
+END SUBROUTINE dbcsr_data_convert
+
ref = fold_build2_loc (EXPR_LOCATION (ref),
MEM_REF, TREE_TYPE (ref),
base, memoff);
+ DR_UNCONSTRAINED_BASE (dr) = true;
access_fns.safe_push (access_fn);
}
}
offset/overlap based analysis but have to rely on points-to
information only. */
if (TREE_CODE (addr_a) == MEM_REF
- && TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME)
+ && (DR_UNCONSTRAINED_BASE (a)
+ || TREE_CODE (TREE_OPERAND (addr_a, 0)) == SSA_NAME))
{
/* For true dependences we can apply TBAA. */
if (flag_strict_aliasing
build_fold_addr_expr (addr_b));
}
else if (TREE_CODE (addr_b) == MEM_REF
- && TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME)
+ && (DR_UNCONSTRAINED_BASE (b)
+ || TREE_CODE (TREE_OPERAND (addr_b, 0)) == SSA_NAME))
{
/* For true dependences we can apply TBAA. */
if (flag_strict_aliasing
/* A list of chrecs. Access functions of the indices. */
vec<tree> access_fns;
+
+ /* Whether BASE_OBJECT is an access representing the whole object
+ or whether the access could not be constrained. */
+ bool unconstrained_base;
};
struct dr_alias
#define DR_STMT(DR) (DR)->stmt
#define DR_REF(DR) (DR)->ref
#define DR_BASE_OBJECT(DR) (DR)->indices.base_object
+#define DR_UNCONSTRAINED_BASE(DR) (DR)->indices.unconstrained_base
#define DR_ACCESS_FNS(DR) (DR)->indices.access_fns
#define DR_ACCESS_FN(DR, I) DR_ACCESS_FNS (DR)[I]
#define DR_NUM_DIMENSIONS(DR) DR_ACCESS_FNS (DR).length ()
{
basic_block bb = gimple_bb (use_stmt);
edge te = find_taken_edge (bb, val);
+ if (!te)
+ continue;
+
edge_iterator ei;
edge e;
gimple_stmt_iterator gsi, psi;
(or in STMT_VINFO_RELATED_STMT chain). */
if (slp_node)
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+
+ if (!*prev_stmt_info)
+ STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
else
- {
- if (!*prev_stmt_info)
- STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
- else
- STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
+ STMT_VINFO_RELATED_STMT (*prev_stmt_info) = new_stmt;
- *prev_stmt_info = vinfo_for_stmt (new_stmt);
- }
+ *prev_stmt_info = vinfo_for_stmt (new_stmt);
}
}
if (slp_node)
SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
+
+ if (!prev_stmt_info)
+ STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
else
- {
- if (!prev_stmt_info)
- STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
- else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
- prev_stmt_info = vinfo_for_stmt (new_stmt);
- }
+ STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
+ prev_stmt_info = vinfo_for_stmt (new_stmt);
}
}
gcc_assert (! nested_in_vect_loop && !STMT_VINFO_GATHER_P (stmt_info));
first_stmt = GROUP_FIRST_ELEMENT (stmt_info);
+
+ /* If this is single-element interleaving with an element distance
+ that leaves unused vector loads around punt - we at least create
+ very sub-optimal code in that case (and blow up memory,
+ see PR65518). */
+ if (first_stmt == stmt
+ && !GROUP_NEXT_ELEMENT (stmt_info)
+ && GROUP_SIZE (stmt_info) > TYPE_VECTOR_SUBPARTS (vectype))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "single-element interleaving not supported "
+ "for not adjacent vector loads\n");
+ return false;
+ }
+
if (!slp && !PURE_SLP_STMT (stmt_info))
{
group_size = GROUP_SIZE (vinfo_for_stmt (first_stmt));