]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Add support for SVE gather loads
authorRichard Sandiford <richard.sandiford@linaro.org>
Sat, 13 Jan 2018 18:01:34 +0000 (18:01 +0000)
committerRichard Sandiford <rsandifo@gcc.gnu.org>
Sat, 13 Jan 2018 18:01:34 +0000 (18:01 +0000)
This patch adds support for SVE gather loads.  It uses the basically
the same analysis code as the AVX gather support, but after that
there are two major differences:

- It uses new internal functions rather than target built-ins.
  The interface is:

     IFN_GATHER_LOAD (base, offsets scale)
     IFN_MASK_GATHER_LOAD (base, offsets scale, mask)

  which should be reasonably generic.  One of the advantages of
  using internal functions is that other passes can understand what
  the functions do, but a more immediate advantage is that we can
  query the underlying target pattern to see which scales it supports.

- It uses pattern recognition to convert the offset to the right width,
  if it was originally narrower than that.  This avoids having to do
  a widening operation as part of the gather expansion itself.

2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
    Alan Hayward  <alan.hayward@arm.com>
    David Sherwood  <david.sherwood@arm.com>

gcc/
* doc/md.texi (gather_load@var{m}): Document.
(mask_gather_load@var{m}): Likewise.
* genopinit.c (main): Add supports_vec_gather_load and
supports_vec_gather_load_cached to target_optabs.
* optabs-tree.c (init_tree_optimization_optabs): Use
ggc_cleared_alloc to allocate target_optabs.
* optabs.def (gather_load_optab, mask_gather_laod_optab): New optabs.
* internal-fn.def (GATHER_LOAD, MASK_GATHER_LOAD): New internal
functions.
* internal-fn.h (internal_load_fn_p): Declare.
(internal_gather_scatter_fn_p): Likewise.
(internal_fn_mask_index): Likewise.
(internal_gather_scatter_fn_supported_p): Likewise.
* internal-fn.c (gather_load_direct): New macro.
(expand_gather_load_optab_fn): New function.
(direct_gather_load_optab_supported_p): New macro.
(direct_internal_fn_optab): New function.
(internal_load_fn_p): Likewise.
(internal_gather_scatter_fn_p): Likewise.
(internal_fn_mask_index): Likewise.
(internal_gather_scatter_fn_supported_p): Likewise.
* optabs-query.c (supports_at_least_one_mode_p): New function.
(supports_vec_gather_load_p): Likewise.
* optabs-query.h (supports_vec_gather_load_p): Declare.
* tree-vectorizer.h (gather_scatter_info): Add ifn, element_type
and memory_type field.
(NUM_PATTERNS): Bump to 15.
* tree-vect-data-refs.c: Include internal-fn.h.
(vect_gather_scatter_fn_p): New function.
(vect_describe_gather_scatter_call): Likewise.
(vect_check_gather_scatter): Try using internal functions for
gather loads.  Recognize existing calls to a gather load function.
(vect_analyze_data_refs): Consider using gather loads if
supports_vec_gather_load_p.
* tree-vect-patterns.c (vect_get_load_store_mask): New function.
(vect_get_gather_scatter_offset_type): Likewise.
(vect_convert_mask_for_vectype): Likewise.
(vect_add_conversion_to_patterm): Likewise.
(vect_try_gather_scatter_pattern): Likewise.
(vect_recog_gather_scatter_pattern): New pattern recognizer.
(vect_vect_recog_func_ptrs): Add it.
* tree-vect-stmts.c (exist_non_indexing_operands_for_use_p): Use
internal_fn_mask_index and internal_gather_scatter_fn_p.
(check_load_store_masking): Take the gather_scatter_info as an
argument and handle gather loads.
(vect_get_gather_scatter_ops): New function.
(vectorizable_call): Check internal_load_fn_p.
(vectorizable_load): Likewise.  Handle gather load internal
functions.
(vectorizable_store): Update call to check_load_store_masking.
* config/aarch64/aarch64.md (UNSPEC_LD1_GATHER): New unspec.
* config/aarch64/iterators.md (SVE_S, SVE_D): New mode iterators.
* config/aarch64/predicates.md (aarch64_gather_scale_operand_w)
(aarch64_gather_scale_operand_d): New predicates.
* config/aarch64/aarch64-sve.md (gather_load<mode>): New expander.
(mask_gather_load<mode>): New insns.

gcc/testsuite/
* gcc.target/aarch64/sve/gather_load_1.c: New test.
* gcc.target/aarch64/sve/gather_load_2.c: Likewise.
* gcc.target/aarch64/sve/gather_load_3.c: Likewise.
* gcc.target/aarch64/sve/gather_load_4.c: Likewise.
* gcc.target/aarch64/sve/gather_load_5.c: Likewise.
* gcc.target/aarch64/sve/gather_load_6.c: Likewise.
* gcc.target/aarch64/sve/gather_load_7.c: Likewise.
* gcc.target/aarch64/sve/mask_gather_load_1.c: Likewise.
* gcc.target/aarch64/sve/mask_gather_load_2.c: Likewise.
* gcc.target/aarch64/sve/mask_gather_load_3.c: Likewise.
* gcc.target/aarch64/sve/mask_gather_load_4.c: Likewise.
* gcc.target/aarch64/sve/mask_gather_load_5.c: Likewise.
* gcc.target/aarch64/sve/mask_gather_load_6.c: Likewise.
* gcc.target/aarch64/sve/mask_gather_load_7.c: Likewise.

Co-Authored-By: Alan Hayward <alan.hayward@arm.com>
Co-Authored-By: David Sherwood <david.sherwood@arm.com>
From-SVN: r256640

33 files changed:
gcc/ChangeLog
gcc/config/aarch64/aarch64-sve.md
gcc/config/aarch64/aarch64.md
gcc/config/aarch64/iterators.md
gcc/config/aarch64/predicates.md
gcc/doc/md.texi
gcc/genopinit.c
gcc/internal-fn.c
gcc/internal-fn.def
gcc/internal-fn.h
gcc/optabs-query.c
gcc/optabs-query.h
gcc/optabs-tree.c
gcc/optabs.def
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/aarch64/sve/gather_load_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/gather_load_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/gather_load_3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/gather_load_4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/gather_load_5.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/gather_load_6.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/gather_load_7.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_1.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_2.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_3.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_4.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_5.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_6.c [new file with mode: 0644]
gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_7.c [new file with mode: 0644]
gcc/tree-vect-data-refs.c
gcc/tree-vect-patterns.c
gcc/tree-vect-stmts.c
gcc/tree-vectorizer.h

index a5daf352ca1b84138bae0a05d3ebf4a9874b5248..8c808374bbc8184ec3263f8d0bce663d82857214 100644 (file)
@@ -1,3 +1,64 @@
+2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
+           Alan Hayward  <alan.hayward@arm.com>
+           David Sherwood  <david.sherwood@arm.com>
+
+       * doc/md.texi (gather_load@var{m}): Document.
+       (mask_gather_load@var{m}): Likewise.
+       * genopinit.c (main): Add supports_vec_gather_load and
+       supports_vec_gather_load_cached to target_optabs.
+       * optabs-tree.c (init_tree_optimization_optabs): Use
+       ggc_cleared_alloc to allocate target_optabs.
+       * optabs.def (gather_load_optab, mask_gather_laod_optab): New optabs.
+       * internal-fn.def (GATHER_LOAD, MASK_GATHER_LOAD): New internal
+       functions.
+       * internal-fn.h (internal_load_fn_p): Declare.
+       (internal_gather_scatter_fn_p): Likewise.
+       (internal_fn_mask_index): Likewise.
+       (internal_gather_scatter_fn_supported_p): Likewise.
+       * internal-fn.c (gather_load_direct): New macro.
+       (expand_gather_load_optab_fn): New function.
+       (direct_gather_load_optab_supported_p): New macro.
+       (direct_internal_fn_optab): New function.
+       (internal_load_fn_p): Likewise.
+       (internal_gather_scatter_fn_p): Likewise.
+       (internal_fn_mask_index): Likewise.
+       (internal_gather_scatter_fn_supported_p): Likewise.
+       * optabs-query.c (supports_at_least_one_mode_p): New function.
+       (supports_vec_gather_load_p): Likewise.
+       * optabs-query.h (supports_vec_gather_load_p): Declare.
+       * tree-vectorizer.h (gather_scatter_info): Add ifn, element_type
+       and memory_type field.
+       (NUM_PATTERNS): Bump to 15.
+       * tree-vect-data-refs.c: Include internal-fn.h.
+       (vect_gather_scatter_fn_p): New function.
+       (vect_describe_gather_scatter_call): Likewise.
+       (vect_check_gather_scatter): Try using internal functions for
+       gather loads.  Recognize existing calls to a gather load function.
+       (vect_analyze_data_refs): Consider using gather loads if
+       supports_vec_gather_load_p.
+       * tree-vect-patterns.c (vect_get_load_store_mask): New function.
+       (vect_get_gather_scatter_offset_type): Likewise.
+       (vect_convert_mask_for_vectype): Likewise.
+       (vect_add_conversion_to_patterm): Likewise.
+       (vect_try_gather_scatter_pattern): Likewise.
+       (vect_recog_gather_scatter_pattern): New pattern recognizer.
+       (vect_vect_recog_func_ptrs): Add it.
+       * tree-vect-stmts.c (exist_non_indexing_operands_for_use_p): Use
+       internal_fn_mask_index and internal_gather_scatter_fn_p.
+       (check_load_store_masking): Take the gather_scatter_info as an
+       argument and handle gather loads.
+       (vect_get_gather_scatter_ops): New function.
+       (vectorizable_call): Check internal_load_fn_p.
+       (vectorizable_load): Likewise.  Handle gather load internal
+       functions.
+       (vectorizable_store): Update call to check_load_store_masking.
+       * config/aarch64/aarch64.md (UNSPEC_LD1_GATHER): New unspec.
+       * config/aarch64/iterators.md (SVE_S, SVE_D): New mode iterators.
+       * config/aarch64/predicates.md (aarch64_gather_scale_operand_w)
+       (aarch64_gather_scale_operand_d): New predicates.
+       * config/aarch64/aarch64-sve.md (gather_load<mode>): New expander.
+       (mask_gather_load<mode>): New insns.
+
 2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
            Alan Hayward  <alan.hayward@arm.com>
            David Sherwood  <david.sherwood@arm.com>
index 08956b9ce4ba4ddd2d7b6873b11e6ce14d59a57e..04ea25c0a87dd85c5199015a73ecb98b59a0edd3 100644 (file)
   "st1<Vesize>\t%1.<Vetype>, %2, %0"
 )
 
+;; Unpredicated gather loads.
+(define_expand "gather_load<mode>"
+  [(set (match_operand:SVE_SD 0 "register_operand")
+       (unspec:SVE_SD
+         [(match_dup 5)
+          (match_operand:DI 1 "aarch64_reg_or_zero")
+          (match_operand:<V_INT_EQUIV> 2 "register_operand")
+          (match_operand:DI 3 "const_int_operand")
+          (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
+          (mem:BLK (scratch))]
+         UNSPEC_LD1_GATHER))]
+  "TARGET_SVE"
+  {
+    operands[5] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
+  }
+)
+
+;; Predicated gather loads for 32-bit elements.  Operand 3 is true for
+;; unsigned extension and false for signed extension.
+(define_insn "mask_gather_load<mode>"
+  [(set (match_operand:SVE_S 0 "register_operand" "=w, w, w, w, w")
+       (unspec:SVE_S
+         [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl, Upl, Upl")
+          (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk, rk, rk")
+          (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w, w, w")
+          (match_operand:DI 3 "const_int_operand" "i, Z, Ui1, Z, Ui1")
+          (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, i, i")
+          (mem:BLK (scratch))]
+         UNSPEC_LD1_GATHER))]
+  "TARGET_SVE"
+  "@
+   ld1w\t%0.s, %5/z, [%2.s]
+   ld1w\t%0.s, %5/z, [%1, %2.s, sxtw]
+   ld1w\t%0.s, %5/z, [%1, %2.s, uxtw]
+   ld1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
+   ld1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
+)
+
+;; Predicated gather loads for 64-bit elements.  The value of operand 3
+;; doesn't matter in this case.
+(define_insn "mask_gather_load<mode>"
+  [(set (match_operand:SVE_D 0 "register_operand" "=w, w, w")
+       (unspec:SVE_D
+         [(match_operand:<VPRED> 5 "register_operand" "Upl, Upl, Upl")
+          (match_operand:DI 1 "aarch64_reg_or_zero" "Z, rk, rk")
+          (match_operand:<V_INT_EQUIV> 2 "register_operand" "w, w, w")
+          (match_operand:DI 3 "const_int_operand")
+          (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, i")
+          (mem:BLK (scratch))]
+         UNSPEC_LD1_GATHER))]
+  "TARGET_SVE"
+  "@
+   ld1d\t%0.d, %5/z, [%2.d]
+   ld1d\t%0.d, %5/z, [%1, %2.d]
+   ld1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
+)
+
 ;; SVE structure moves.
 (define_expand "mov<mode>"
   [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
index 4a73ccc7c3f60af01d180ae2461dd6cb11333432..455e04d46233d613f3c3e520d2b906ce5f1dc755 100644 (file)
     UNSPEC_LD1_SVE
     UNSPEC_ST1_SVE
     UNSPEC_LD1RQ
+    UNSPEC_LD1_GATHER
     UNSPEC_MERGE_PTRUE
     UNSPEC_PTEST_PTRUE
     UNSPEC_UNPACKSHI
index c380b3bfecf9da67bb16fd72bf59afc8a0af9b33..9c1c9dabdd9ca50802ffa21efe6d11314f6503d6 100644 (file)
 ;; All SVE vector modes that have 32-bit or 64-bit elements.
 (define_mode_iterator SVE_SD [VNx4SI VNx2DI VNx4SF VNx2DF])
 
+;; All SVE vector modes that have 32-bit elements.
+(define_mode_iterator SVE_S [VNx4SI VNx4SF])
+
+;; All SVE vector modes that have 64-bit elements.
+(define_mode_iterator SVE_D [VNx2DI VNx2DF])
+
 ;; All SVE integer vector modes that have 32-bit or 64-bit elements.
 (define_mode_iterator SVE_SDI [VNx4SI VNx2DI])
 
index 701789a08d13eb760d2a214c36920905eb8e8391..0df1a4d324c329848cbb447372dc7712eb34840e 100644 (file)
 (define_predicate "aarch64_sve_vec_perm_operand"
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "aarch64_constant_vector_operand")))
+
+(define_predicate "aarch64_gather_scale_operand_w"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 1 || INTVAL (op) == 4")))
+
+(define_predicate "aarch64_gather_scale_operand_d"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 1 || INTVAL (op) == 8")))
index 4527b44d1af1b026d06125a4079c5196c33ecf33..245fa90e6d14d13aca3abf8c1f87599276a77b04 100644 (file)
@@ -4908,6 +4908,35 @@ for (j = 0; j < GET_MODE_NUNITS (@var{n}); j++)
 
 This pattern is not allowed to @code{FAIL}.
 
+@cindex @code{gather_load@var{m}} instruction pattern
+@item @samp{gather_load@var{m}}
+Load several separate memory locations into a vector of mode @var{m}.
+Operand 1 is a scalar base address and operand 2 is a vector of
+offsets from that base.  Operand 0 is a destination vector with the
+same number of elements as the offset.  For each element index @var{i}:
+
+@itemize @bullet
+@item
+extend the offset element @var{i} to address width, using zero
+extension if operand 3 is 1 and sign extension if operand 3 is zero;
+@item
+multiply the extended offset by operand 4;
+@item
+add the result to the base; and
+@item
+load the value at that address into element @var{i} of operand 0.
+@end itemize
+
+The value of operand 3 does not matter if the offsets are already
+address width.
+
+@cindex @code{mask_gather_load@var{m}} instruction pattern
+@item @samp{mask_gather_load@var{m}}
+Like @samp{gather_load@var{m}}, but takes an extra mask operand as
+operand 5.  Bit @var{i} of the mask is set if element @var{i}
+of the result should be loaded from memory and clear if element @var{i}
+of the result should be set to zero.
+
 @cindex @code{vec_set@var{m}} instruction pattern
 @item @samp{vec_set@var{m}}
 Set given field in the vector value.  Operand 0 is the vector to modify,
index 42a5ae545986e6d5bb69e95fa716b47d3704daf0..dd4e3aa03905b7e60dec332630a3c8a2abd87758 100644 (file)
@@ -234,6 +234,11 @@ main (int argc, const char **argv)
           "struct target_optabs {\n"
           "  /* Patterns that are used by optabs that are enabled for this target.  */\n"
           "  bool pat_enable[NUM_OPTAB_PATTERNS];\n"
+          "\n"
+          "  /* Cache if the target supports vec_gather_load for at least one vector\n"
+          "     mode.  */\n"
+          "  bool supports_vec_gather_load;\n"
+          "  bool supports_vec_gather_load_cached;\n"
           "};\n"
           "extern void init_all_optabs (struct target_optabs *);\n"
           "\n"
index 42cdf1345e560b297cf1b83e3fcc5b88e2109c84..8cf5b79c0289c1741ff5a5c9a40b0cb12e4cf96d 100644 (file)
@@ -83,6 +83,7 @@ init_internal_fns ()
 #define mask_load_direct { -1, 2, false }
 #define load_lanes_direct { -1, -1, false }
 #define mask_load_lanes_direct { -1, -1, false }
+#define gather_load_direct { -1, -1, false }
 #define mask_store_direct { 3, 2, false }
 #define store_lanes_direct { 0, 0, false }
 #define mask_store_lanes_direct { 0, 0, false }
@@ -2729,6 +2730,38 @@ expand_LAUNDER (internal_fn, gcall *call)
   expand_assignment (lhs, gimple_call_arg (call, 0), false);
 }
 
+/* Expand {MASK_,}GATHER_LOAD call CALL using optab OPTAB.  */
+
+static void
+expand_gather_load_optab_fn (internal_fn, gcall *stmt, direct_optab optab)
+{
+  tree lhs = gimple_call_lhs (stmt);
+  tree base = gimple_call_arg (stmt, 0);
+  tree offset = gimple_call_arg (stmt, 1);
+  tree scale = gimple_call_arg (stmt, 2);
+
+  rtx lhs_rtx = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  rtx base_rtx = expand_normal (base);
+  rtx offset_rtx = expand_normal (offset);
+  HOST_WIDE_INT scale_int = tree_to_shwi (scale);
+
+  int i = 0;
+  struct expand_operand ops[6];
+  create_output_operand (&ops[i++], lhs_rtx, TYPE_MODE (TREE_TYPE (lhs)));
+  create_address_operand (&ops[i++], base_rtx);
+  create_input_operand (&ops[i++], offset_rtx, TYPE_MODE (TREE_TYPE (offset)));
+  create_integer_operand (&ops[i++], TYPE_UNSIGNED (TREE_TYPE (offset)));
+  create_integer_operand (&ops[i++], scale_int);
+  if (optab == mask_gather_load_optab)
+    {
+      tree mask = gimple_call_arg (stmt, 3);
+      rtx mask_rtx = expand_normal (mask);
+      create_input_operand (&ops[i++], mask_rtx, TYPE_MODE (TREE_TYPE (mask)));
+    }
+  insn_code icode = direct_optab_handler (optab, TYPE_MODE (TREE_TYPE (lhs)));
+  expand_insn (icode, i, ops);
+}
+
 /* Expand DIVMOD() using:
  a) optab handler for udivmod/sdivmod if it is available.
  b) If optab_handler doesn't exist, generate call to
@@ -2979,6 +3012,7 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
 #define direct_mask_load_optab_supported_p direct_optab_supported_p
 #define direct_load_lanes_optab_supported_p multi_vector_optab_supported_p
 #define direct_mask_load_lanes_optab_supported_p multi_vector_optab_supported_p
+#define direct_gather_load_optab_supported_p direct_optab_supported_p
 #define direct_mask_store_optab_supported_p direct_optab_supported_p
 #define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p
 #define direct_mask_store_lanes_optab_supported_p multi_vector_optab_supported_p
@@ -3010,6 +3044,25 @@ direct_internal_fn_optab (internal_fn fn, tree_pair types)
   gcc_unreachable ();
 }
 
+/* Return the optab used by internal function FN.  */
+
+static optab
+direct_internal_fn_optab (internal_fn fn)
+{
+  switch (fn)
+    {
+#define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) \
+    case IFN_##CODE: break;
+#define DEF_INTERNAL_OPTAB_FN(CODE, FLAGS, OPTAB, TYPE) \
+    case IFN_##CODE: return OPTAB##_optab;
+#include "internal-fn.def"
+
+    case IFN_LAST:
+      break;
+    }
+  gcc_unreachable ();
+}
+
 /* Return true if FN is supported for the types in TYPES when the
    optimization type is OPT_TYPE.  The types are those associated with
    the "type0" and "type1" fields of FN's direct_internal_fn_info
@@ -3130,6 +3183,87 @@ get_conditional_internal_fn (tree_code code)
     }
 }
 
+/* Return true if IFN is some form of load from memory.  */
+
+bool
+internal_load_fn_p (internal_fn fn)
+{
+  switch (fn)
+    {
+    case IFN_MASK_LOAD:
+    case IFN_LOAD_LANES:
+    case IFN_MASK_LOAD_LANES:
+    case IFN_GATHER_LOAD:
+    case IFN_MASK_GATHER_LOAD:
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* Return true if IFN is some form of gather load or scatter store.  */
+
+bool
+internal_gather_scatter_fn_p (internal_fn fn)
+{
+  switch (fn)
+    {
+    case IFN_GATHER_LOAD:
+    case IFN_MASK_GATHER_LOAD:
+      return true;
+
+    default:
+      return false;
+    }
+}
+
+/* If FN takes a vector mask argument, return the index of that argument,
+   otherwise return -1.  */
+
+int
+internal_fn_mask_index (internal_fn fn)
+{
+  switch (fn)
+    {
+    case IFN_MASK_LOAD:
+    case IFN_MASK_LOAD_LANES:
+    case IFN_MASK_STORE:
+    case IFN_MASK_STORE_LANES:
+      return 2;
+
+    case IFN_MASK_GATHER_LOAD:
+      return 3;
+
+    default:
+      return -1;
+    }
+}
+
+/* Return true if the target supports gather load or scatter store function
+   IFN.  For loads, VECTOR_TYPE is the vector type of the load result,
+   while for stores it is the vector type of the stored data argument.
+   MEMORY_ELEMENT_TYPE is the type of the memory elements being loaded
+   or stored.  OFFSET_SIGN is the sign of the offset argument, which is
+   only relevant when the offset is narrower than an address.  SCALE is
+   the amount by which the offset should be multiplied *after* it has
+   been extended to address width.  */
+
+bool
+internal_gather_scatter_fn_supported_p (internal_fn ifn, tree vector_type,
+                                       tree memory_element_type,
+                                       signop offset_sign, int scale)
+{
+  if (!tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vector_type)),
+                          TYPE_SIZE (memory_element_type)))
+    return false;
+  optab optab = direct_internal_fn_optab (ifn);
+  insn_code icode = direct_optab_handler (optab, TYPE_MODE (vector_type));
+  return (icode != CODE_FOR_nothing
+         && insn_operand_matches (icode, 3, GEN_INT (offset_sign == UNSIGNED))
+         && insn_operand_matches (icode, 4, GEN_INT (scale)));
+}
+
 /* Expand STMT as though it were a call to internal function FN.  */
 
 void
index 36bcf885bf78d000420cc9e3319a3a2d2e684f18..db81b83eb5b4120a65f1260f2a5583e81784bedd 100644 (file)
@@ -48,6 +48,7 @@ along with GCC; see the file COPYING3.  If not see
    - mask_load: currently just maskload
    - load_lanes: currently just vec_load_lanes
    - mask_load_lanes: currently just vec_mask_load_lanes
+   - gather_load: used for {mask_,}gather_load
 
    - mask_store: currently just maskstore
    - store_lanes: currently just vec_store_lanes
@@ -118,6 +119,10 @@ DEF_INTERNAL_OPTAB_FN (LOAD_LANES, ECF_CONST, vec_load_lanes, load_lanes)
 DEF_INTERNAL_OPTAB_FN (MASK_LOAD_LANES, ECF_PURE,
                       vec_mask_load_lanes, mask_load_lanes)
 
+DEF_INTERNAL_OPTAB_FN (GATHER_LOAD, ECF_PURE, gather_load, gather_load)
+DEF_INTERNAL_OPTAB_FN (MASK_GATHER_LOAD, ECF_PURE,
+                      mask_gather_load, gather_load)
+
 DEF_INTERNAL_OPTAB_FN (MASK_STORE, 0, maskstore, mask_store)
 DEF_INTERNAL_OPTAB_FN (STORE_LANES, ECF_CONST, vec_store_lanes, store_lanes)
 DEF_INTERNAL_OPTAB_FN (MASK_STORE_LANES, 0,
index 0464e3f9e7a6229647e0eaeeaa9e521a4a8e48b8..c536d1ff5cb6ae26732e152e578472ab7eeb3aee 100644 (file)
@@ -192,6 +192,12 @@ extern bool set_edom_supported_p (void);
 
 extern internal_fn get_conditional_internal_fn (tree_code);
 
+extern bool internal_load_fn_p (internal_fn);
+extern bool internal_gather_scatter_fn_p (internal_fn);
+extern int internal_fn_mask_index (internal_fn);
+extern bool internal_gather_scatter_fn_supported_p (internal_fn, tree,
+                                                   tree, signop, int);
+
 extern void expand_internal_call (gcall *);
 extern void expand_internal_call (internal_fn, gcall *);
 extern void expand_PHI (internal_fn, gcall *);
index d70380605b2737c69f5409f55a22fd9c8d1bd600..b5c7a76e67e415a4ae23a0991547a7ddae51b957 100644 (file)
@@ -682,3 +682,32 @@ lshift_cheap_p (bool speed_p)
 
   return cheap[speed_p];
 }
+
+/* Return true if optab OP supports at least one mode.  */
+
+static bool
+supports_at_least_one_mode_p (optab op)
+{
+  for (int i = 0; i < NUM_MACHINE_MODES; ++i)
+    if (direct_optab_handler (op, (machine_mode) i) != CODE_FOR_nothing)
+      return true;
+
+  return false;
+}
+
+/* Return true if vec_gather_load is available for at least one vector
+   mode.  */
+
+bool
+supports_vec_gather_load_p ()
+{
+  if (this_fn_optabs->supports_vec_gather_load_cached)
+    return this_fn_optabs->supports_vec_gather_load;
+
+  this_fn_optabs->supports_vec_gather_load_cached = true;
+
+  this_fn_optabs->supports_vec_gather_load
+    = supports_at_least_one_mode_p (gather_load_optab);
+
+  return this_fn_optabs->supports_vec_gather_load;
+}
index 3cba27af255e241a0db4856fac27c3414085fa16..2b02d07b1be98a37d736ea1bf928f0e91939f353 100644 (file)
@@ -191,6 +191,7 @@ bool can_compare_and_swap_p (machine_mode, bool);
 bool can_atomic_exchange_p (machine_mode, bool);
 bool can_atomic_load_p (machine_mode);
 bool lshift_cheap_p (bool);
+bool supports_vec_gather_load_p ();
 
 /* Version of find_widening_optab_handler_and_mode that operates on
    specific mode types.  */
index 60119cb442ea984306f64889e4054ab343cc9611..71e172c4900a28c793cdfc1c32e07d08d6656d60 100644 (file)
@@ -358,7 +358,7 @@ init_tree_optimization_optabs (tree optnode)
   if (tmp_optabs)
     memset (tmp_optabs, 0, sizeof (struct target_optabs));
   else
-    tmp_optabs = ggc_alloc<target_optabs> ();
+    tmp_optabs = ggc_cleared_alloc<target_optabs> ();
 
   /* Generate a new set of optabs into tmp_optabs.  */
   init_all_optabs (tmp_optabs);
index 7fab96f2d6659bf7681741cd72c21da33ed4cdf8..532cf9789ba2dd3652ba69ba0719de2abccbfe19 100644 (file)
@@ -384,6 +384,9 @@ OPTAB_D (atomic_xor_optab, "atomic_xor$I$a")
 OPTAB_D (get_thread_pointer_optab, "get_thread_pointer$I$a")
 OPTAB_D (set_thread_pointer_optab, "set_thread_pointer$I$a")
 
+OPTAB_D (gather_load_optab, "gather_load$a")
+OPTAB_D (mask_gather_load_optab, "mask_gather_load$a")
+
 OPTAB_DC (vec_duplicate_optab, "vec_duplicate$a", VEC_DUPLICATE)
 OPTAB_DC (vec_series_optab, "vec_series$a", VEC_SERIES)
 OPTAB_D (vec_shl_insert_optab, "vec_shl_insert_$a")
index 8c8987639e65df9a3b1b3ebe6823758208b737a2..78e5f4c5ababa4049c6c0d76e4bc89f23e83fad1 100644 (file)
@@ -1,3 +1,22 @@
+2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
+           Alan Hayward  <alan.hayward@arm.com>
+           David Sherwood  <david.sherwood@arm.com>
+
+       * gcc.target/aarch64/sve/gather_load_1.c: New test.
+       * gcc.target/aarch64/sve/gather_load_2.c: Likewise.
+       * gcc.target/aarch64/sve/gather_load_3.c: Likewise.
+       * gcc.target/aarch64/sve/gather_load_4.c: Likewise.
+       * gcc.target/aarch64/sve/gather_load_5.c: Likewise.
+       * gcc.target/aarch64/sve/gather_load_6.c: Likewise.
+       * gcc.target/aarch64/sve/gather_load_7.c: Likewise.
+       * gcc.target/aarch64/sve/mask_gather_load_1.c: Likewise.
+       * gcc.target/aarch64/sve/mask_gather_load_2.c: Likewise.
+       * gcc.target/aarch64/sve/mask_gather_load_3.c: Likewise.
+       * gcc.target/aarch64/sve/mask_gather_load_4.c: Likewise.
+       * gcc.target/aarch64/sve/mask_gather_load_5.c: Likewise.
+       * gcc.target/aarch64/sve/mask_gather_load_6.c: Likewise.
+       * gcc.target/aarch64/sve/mask_gather_load_7.c: Likewise.
+
 2018-01-13  Richard Sandiford  <richard.sandiford@linaro.org>
            Alan Hayward  <alan.hayward@arm.com>
            David Sherwood  <david.sherwood@arm.com>
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_1.c b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_1.c
new file mode 100644 (file)
index 0000000..33f1629
--- /dev/null
@@ -0,0 +1,32 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+/* Invoked 18 times for each data size.  */
+#define TEST_LOOP(DATA_TYPE, BITS)                                     \
+  void __attribute__ ((noinline, noclone))                             \
+  f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src,    \
+                INDEX##BITS *indices, int n)                           \
+  {                                                                    \
+    for (int i = 9; i < n; ++i)                                                \
+      dest[i] += src[indices[i]];                                      \
+  }
+
+#define TEST_ALL(T)                            \
+  T (int32_t, 32)                              \
+  T (uint32_t, 32)                             \
+  T (float, 32)                                        \
+  T (int64_t, 64)                              \
+  T (uint64_t, 64)                             \
+  T (double, 64)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_2.c
new file mode 100644 (file)
index 0000000..e3fb2a9
--- /dev/null
@@ -0,0 +1,10 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
+
+#include "gather_load_1.c"
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw 2\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_3.c b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_3.c
new file mode 100644 (file)
index 0000000..54af507
--- /dev/null
@@ -0,0 +1,32 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+/* Invoked 18 times for each data size.  */
+#define TEST_LOOP(DATA_TYPE, BITS)                                     \
+  void __attribute__ ((noinline, noclone))                             \
+  f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src,    \
+                INDEX##BITS *indices, int n)                           \
+  {                                                                    \
+    for (int i = 9; i < n; ++i)                                                \
+      dest[i] += *(DATA_TYPE *) ((char *) src + indices[i]);           \
+  }
+
+#define TEST_ALL(T)                            \
+  T (int32_t, 32)                              \
+  T (uint32_t, 32)                             \
+  T (float, 32)                                        \
+  T (int64_t, 64)                              \
+  T (uint64_t, 64)                             \
+  T (double, 64)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_4.c b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_4.c
new file mode 100644 (file)
index 0000000..3e2c831
--- /dev/null
@@ -0,0 +1,10 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
+
+#include "gather_load_3.c"
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, uxtw\]\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_5.c b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_5.c
new file mode 100644 (file)
index 0000000..b22a80a
--- /dev/null
@@ -0,0 +1,23 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#include <stdint.h>
+
+/* Invoked 18 times for each data size.  */
+#define TEST_LOOP(DATA_TYPE)                                           \
+  void __attribute__ ((noinline, noclone))                             \
+  f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict *src,   \
+                int n)                                                 \
+  {                                                                    \
+    for (int i = 9; i < n; ++i)                                                \
+      dest[i] += *src[i];                                              \
+  }
+
+#define TEST_ALL(T)                            \
+  T (int64_t)                                  \
+  T (uint64_t)                                 \
+  T (double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[z[0-9]+.d\]\n} 3 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_6.c b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_6.c
new file mode 100644 (file)
index 0000000..8445be4
--- /dev/null
@@ -0,0 +1,36 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -fwrapv --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX16 int16_t
+#define INDEX32 int32_t
+#endif
+
+/* Invoked 18 times for each data size.  */
+#define TEST_LOOP(DATA_TYPE, BITS)                                     \
+  void __attribute__ ((noinline, noclone))                             \
+  f_##DATA_TYPE (DATA_TYPE *restrict dest, DATA_TYPE *restrict src,    \
+                INDEX##BITS *indices, INDEX##BITS mask, int n)         \
+  {                                                                    \
+    for (int i = 9; i < n; ++i)                                                \
+      dest[i] = src[(INDEX##BITS) (indices[i] | mask)];                        \
+  }
+
+#define TEST_ALL(T)                            \
+  T (int32_t, 16)                              \
+  T (uint32_t, 16)                             \
+  T (float, 16)                                        \
+  T (int64_t, 32)                              \
+  T (uint64_t, 32)                             \
+  T (double, 32)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tsunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, sxtw 2\]\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_7.c b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_7.c
new file mode 100644 (file)
index 0000000..f5ae930
--- /dev/null
@@ -0,0 +1,15 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize --save-temps" } */
+
+#define INDEX16 uint16_t
+#define INDEX32 uint32_t
+
+#include "gather_load_6.c"
+
+/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.s, z[0-9]+\.h\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tuunpkhi\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tuunpklo\tz[0-9]+\.d, z[0-9]+\.s\n} 3 } } */
+/* Either extension type is OK here.  */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+.s, [us]xtw 2\]\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+.d, lsl 3\]\n} 6 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_1.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_1.c
new file mode 100644 (file)
index 0000000..15f863f
--- /dev/null
@@ -0,0 +1,52 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, CMP_TYPE, BITS)                           \
+  void                                                                 \
+  f_##DATA_TYPE##_##CMP_TYPE                                           \
+    (DATA_TYPE *restrict dest, DATA_TYPE *restrict src,                        \
+     CMP_TYPE *cmp1, CMP_TYPE *cmp2, INDEX##BITS *indices, int n)      \
+  {                                                                    \
+    for (int i = 0; i < n; ++i)                                                \
+      if (cmp1[i] == cmp2[i])                                          \
+       dest[i] += src[indices[i]];                                     \
+  }
+
+#define TEST32(T, DATA_TYPE)           \
+  T (DATA_TYPE, int32_t, 32)           \
+  T (DATA_TYPE, uint32_t, 32)          \
+  T (DATA_TYPE, float, 32)
+
+#define TEST64(T, DATA_TYPE)           \
+  T (DATA_TYPE, int64_t, 64)           \
+  T (DATA_TYPE, uint64_t, 64)          \
+  T (DATA_TYPE, double, 64)
+
+#define TEST_ALL(T)                    \
+  TEST32 (T, int32_t)                  \
+  TEST32 (T, uint32_t)                 \
+  TEST32 (T, float)                    \
+  TEST64 (T, int64_t)                  \
+  TEST64 (T, uint64_t)                 \
+  TEST64 (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw 2\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl 3\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, x[0-9]+, lsl 3\]\n} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_2.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_2.c
new file mode 100644 (file)
index 0000000..9fa6009
--- /dev/null
@@ -0,0 +1,19 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --save-temps" } */
+
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
+
+#include "mask_gather_load_1.c"
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw 2\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl 3\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, x[0-9]+, lsl 3\]\n} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_3.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_3.c
new file mode 100644 (file)
index 0000000..90ad6ae
--- /dev/null
@@ -0,0 +1,52 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, CMP_TYPE, BITS)                           \
+  void                                                                 \
+  f_##DATA_TYPE##_##CMP_TYPE                                           \
+    (DATA_TYPE *restrict dest, DATA_TYPE *restrict src,                        \
+     CMP_TYPE *cmp1, CMP_TYPE *cmp2, INDEX##BITS *indices, int n)      \
+  {                                                                    \
+    for (int i = 0; i < n; ++i)                                                \
+      if (cmp1[i] == cmp2[i])                                          \
+       dest[i] += *(DATA_TYPE *) ((char *) src + indices[i]);          \
+  }
+
+#define TEST32(T, DATA_TYPE)           \
+  T (DATA_TYPE, int32_t, 32)           \
+  T (DATA_TYPE, uint32_t, 32)          \
+  T (DATA_TYPE, float, 32)
+
+#define TEST64(T, DATA_TYPE)           \
+  T (DATA_TYPE, int64_t, 64)           \
+  T (DATA_TYPE, uint64_t, 64)          \
+  T (DATA_TYPE, double, 64)
+
+#define TEST_ALL(T)                    \
+  TEST32 (T, int32_t)                  \
+  TEST32 (T, uint32_t)                 \
+  TEST32 (T, float)                    \
+  TEST64 (T, int64_t)                  \
+  TEST64 (T, uint64_t)                 \
+  TEST64 (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, x[0-9]+, lsl 3\]\n} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_4.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_4.c
new file mode 100644 (file)
index 0000000..1c8390a
--- /dev/null
@@ -0,0 +1,19 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --save-temps" } */
+
+#define INDEX32 uint32_t
+#define INDEX64 uint64_t
+
+#include "mask_gather_load_3.c"
+
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 9 } } */
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, x[0-9]+, lsl 3\]\n} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_5.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_5.c
new file mode 100644 (file)
index 0000000..60a93f1
--- /dev/null
@@ -0,0 +1,38 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --save-temps" } */
+
+#include <stdint.h>
+
+#ifndef INDEX32
+#define INDEX32 int32_t
+#define INDEX64 int64_t
+#endif
+
+#define TEST_LOOP(DATA_TYPE, CMP_TYPE)                                 \
+  void                                                                 \
+  f_##DATA_TYPE##_##CMP_TYPE                                           \
+    (DATA_TYPE *restrict dest, DATA_TYPE *restrict *restrict src,      \
+     CMP_TYPE *cmp1, CMP_TYPE *cmp2, int n)                            \
+  {                                                                    \
+    for (int i = 0; i < n; ++i)                                                \
+      if (cmp1[i] == cmp2[i])                                          \
+       dest[i] += *src[i];                                             \
+  }
+
+#define TEST_TYPE(T, DATA_TYPE)                \
+  T (DATA_TYPE, int64_t)               \
+  T (DATA_TYPE, uint64_t)              \
+  T (DATA_TYPE, double)
+
+#define TEST_ALL(T)                    \
+  TEST_TYPE (T, int64_t)               \
+  TEST_TYPE (T, uint64_t)              \
+  TEST_TYPE (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[z[0-9]+\.d\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1d\tz[0-9]+\.d, p[0-7], \[x[0-9]+, x[0-9]+, lsl 3\]\n} 9 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_6.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_6.c
new file mode 100644 (file)
index 0000000..ff01431
--- /dev/null
@@ -0,0 +1,38 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, CMP_TYPE, INDEX_TYPE)                     \
+  void                                                                 \
+  f_##DATA_TYPE##_##CMP_TYPE##_##INDEX_TYPE                            \
+    (DATA_TYPE *restrict dest, DATA_TYPE *restrict src,                        \
+     CMP_TYPE *cmp1, CMP_TYPE *cmp2, INDEX_TYPE *indices, int n)       \
+  {                                                                    \
+    for (int i = 0; i < n; ++i)                                                \
+      if (cmp1[i] == cmp2[i])                                          \
+       dest[i] += src[indices[i]];                                     \
+  }
+
+#define TEST32(T, DATA_TYPE)                   \
+  T (DATA_TYPE, int64_t, int32_t)              \
+  T (DATA_TYPE, uint64_t, int32_t)             \
+  T (DATA_TYPE, double, int32_t)               \
+  T (DATA_TYPE, int64_t, uint32_t)             \
+  T (DATA_TYPE, uint64_t, uint32_t)            \
+  T (DATA_TYPE, double, uint32_t)
+
+#define TEST_ALL(T)                    \
+  TEST32 (T, int32_t)                  \
+  TEST32 (T, uint32_t)                 \
+  TEST32 (T, float)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 3\]\n} 72 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 24 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw 2\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw 2\]\n} 9 } } */
+/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.s, p[0-7], \[x[0-9]+, x[0-9]+, lsl 2\]\n} 18 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_7.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_gather_load_7.c
new file mode 100644 (file)
index 0000000..cd2661e
--- /dev/null
@@ -0,0 +1,53 @@
+/* { dg-do assemble { target aarch64_asm_sve_ok } } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math --save-temps" } */
+
+#include <stdint.h>
+
+#define TEST_LOOP(DATA_TYPE, CMP_TYPE, INDEX_TYPE)                     \
+  void                                                                 \
+  f_##DATA_TYPE##_##CMP_TYPE##_##INDEX_TYPE                            \
+    (DATA_TYPE *restrict dest, DATA_TYPE *restrict src,                        \
+     CMP_TYPE *cmp1, CMP_TYPE *cmp2, INDEX_TYPE *indices, int n)       \
+  {                                                                    \
+    for (int i = 0; i < n; ++i)                                                \
+      if (cmp1[i] == cmp2[i])                                          \
+       dest[i] += src[indices[i]];                                     \
+  }
+
+#define TEST32(T, DATA_TYPE)                   \
+  T (DATA_TYPE, int16_t, int32_t)              \
+  T (DATA_TYPE, uint16_t, int32_t)             \
+  T (DATA_TYPE, _Float16, int32_t)             \
+  T (DATA_TYPE, int16_t, uint32_t)             \
+  T (DATA_TYPE, uint16_t, uint32_t)            \
+  T (DATA_TYPE, _Float16, uint32_t)
+
+#define TEST64(T, DATA_TYPE)                   \
+  T (DATA_TYPE, int32_t, int64_t)              \
+  T (DATA_TYPE, uint32_t, int64_t)             \
+  T (DATA_TYPE, float, int64_t)                        \
+  T (DATA_TYPE, int32_t, uint64_t)             \
+  T (DATA_TYPE, uint32_t, uint64_t)            \
+  T (DATA_TYPE, float, uint64_t)
+
+#define TEST_ALL(T)                    \
+  TEST32 (T, int32_t)                  \
+  TEST32 (T, uint32_t)                 \
+  TEST32 (T, float)                    \
+  TEST64 (T, int64_t)                  \
+  TEST64 (T, uint64_t)                 \
+  TEST64 (T, double)
+
+TEST_ALL (TEST_LOOP)
+
+/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 1\]\n} 36 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.h, p[0-7]/z, z[0-9]+\.h, z[0-9]+\.h\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, sxtw 2\]\n} 18 } } */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, z[0-9]+\.s, uxtw 2\]\n} 18 } } */
+
+/* Also used for the TEST32 indices.  */
+/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s, p[0-7]/z, \[x[0-9]+, x[0-9]+, lsl 2\]\n} 72 } } */
+/* { dg-final { scan-assembler-times {\tcmpeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 12 } } */
+/* { dg-final { scan-assembler-times {\tfcmeq\tp[0-7]\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d, p[0-7]/z, \[x[0-9]+, z[0-9]+\.d, lsl 3\]\n} 36 } } */
index 59462be4295e4b55f2a4881e9cdea8b27742d034..f79be863194cb02038691fba5f6f707d1494d366 100644 (file)
@@ -53,6 +53,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-cfg.h"
 #include "tree-hash-traits.h"
 #include "vec-perm-indices.h"
+#include "internal-fn.h"
 
 /* Return true if load- or store-lanes optab OPTAB is implemented for
    COUNT vectors of type VECTYPE.  NAME is the name of OPTAB.  */
@@ -3300,6 +3301,74 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
   return true;
 }
 
+/* Check whether we can use an internal function for a gather load
+   or scatter store.  READ_P is true for loads and false for stores.
+   MASKED_P is true if the load or store is conditional.  MEMORY_TYPE is
+   the type of the memory elements being loaded or stored.  OFFSET_BITS
+   is the number of bits in each scalar offset and OFFSET_SIGN is the
+   sign of the offset.  SCALE is the amount by which the offset should
+   be multiplied *after* it has been converted to address width.
+
+   Return true if the function is supported, storing the function
+   id in *IFN_OUT and the type of a vector element in *ELEMENT_TYPE_OUT.  */
+
+static bool
+vect_gather_scatter_fn_p (bool read_p, bool masked_p, tree vectype,
+                         tree memory_type, unsigned int offset_bits,
+                         signop offset_sign, int scale,
+                         internal_fn *ifn_out, tree *element_type_out)
+{
+  unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
+  unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype)));
+  if (offset_bits > element_bits)
+    /* Internal functions require the offset to be the same width as
+       the vector elements.  We can extend narrower offsets, but it isn't
+       safe to truncate wider offsets.  */
+    return false;
+
+  if (element_bits != memory_bits)
+    /* For now the vector elements must be the same width as the
+       memory elements.  */
+    return false;
+
+  /* Work out which function we need.  */
+  internal_fn ifn;
+  if (read_p)
+    ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
+  else
+    return false;
+
+  /* Test whether the target supports this combination.  */
+  if (!internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
+                                              offset_sign, scale))
+    return false;
+
+  *ifn_out = ifn;
+  *element_type_out = TREE_TYPE (vectype);
+  return true;
+}
+
+/* CALL is a call to an internal gather load or scatter store function.
+   Describe the operation in INFO.  */
+
+static void
+vect_describe_gather_scatter_call (gcall *call, gather_scatter_info *info)
+{
+  stmt_vec_info stmt_info = vinfo_for_stmt (call);
+  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+  data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
+
+  info->ifn = gimple_call_internal_fn (call);
+  info->decl = NULL_TREE;
+  info->base = gimple_call_arg (call, 0);
+  info->offset = gimple_call_arg (call, 1);
+  info->offset_dt = vect_unknown_def_type;
+  info->offset_vectype = NULL_TREE;
+  info->scale = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
+  info->element_type = TREE_TYPE (vectype);
+  info->memory_type = TREE_TYPE (DR_REF (dr));
+}
+
 /* Return true if a non-affine read or write in STMT is suitable for a
    gather load or scatter store.  Describe the operation in *INFO if so.  */
 
@@ -3313,17 +3382,38 @@ vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo,
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
   tree offtype = NULL_TREE;
-  tree decl, base, off;
+  tree decl = NULL_TREE, base, off;
+  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+  tree memory_type = TREE_TYPE (DR_REF (dr));
   machine_mode pmode;
   int punsignedp, reversep, pvolatilep = 0;
+  internal_fn ifn;
+  tree element_type;
+  bool masked_p = false;
+
+  /* See whether this is already a call to a gather/scatter internal function.
+     If not, see whether it's a masked load or store.  */
+  gcall *call = dyn_cast <gcall *> (stmt);
+  if (call && gimple_call_internal_p (call))
+    {
+      ifn = gimple_call_internal_fn (stmt);
+      if (internal_gather_scatter_fn_p (ifn))
+       {
+         vect_describe_gather_scatter_call (call, info);
+         return true;
+       }
+      masked_p = (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE);
+    }
+
+  /* True if we should aim to use internal functions rather than
+     built-in functions.  */
+  bool use_ifn_p = (DR_IS_READ (dr)
+                   && supports_vec_gather_load_p ());
 
   base = DR_REF (dr);
   /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
      see if we can use the def stmt of the address.  */
-  if (is_gimple_call (stmt)
-      && gimple_call_internal_p (stmt)
-      && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
-         || gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
+  if (masked_p
       && TREE_CODE (base) == MEM_REF
       && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
       && integer_zerop (TREE_OPERAND (base, 1))
@@ -3454,7 +3544,17 @@ vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo,
        case MULT_EXPR:
          if (scale == 1 && tree_fits_shwi_p (op1))
            {
-             scale = tree_to_shwi (op1);
+             int new_scale = tree_to_shwi (op1);
+             /* Only treat this as a scaling operation if the target
+                supports it.  */
+             if (use_ifn_p
+                 && !vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p,
+                                               vectype, memory_type, 1,
+                                               TYPE_SIGN (TREE_TYPE (op0)),
+                                               new_scale, &ifn,
+                                               &element_type))
+               break;
+             scale = new_scale;
              off = op0;
              continue;
            }
@@ -3472,6 +3572,15 @@ vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo,
              off = op0;
              continue;
            }
+
+         /* The internal functions need the offset to be the same width
+            as the elements of VECTYPE.  Don't include operations that
+            cast the offset from that width to a different width.  */
+         if (use_ifn_p
+             && (int_size_in_bytes (TREE_TYPE (vectype))
+                 == int_size_in_bytes (TREE_TYPE (off))))
+           break;
+
          if (TYPE_PRECISION (TREE_TYPE (op0))
              < TYPE_PRECISION (TREE_TYPE (off)))
            {
@@ -3496,22 +3605,37 @@ vect_check_gather_scatter (gimple *stmt, loop_vec_info loop_vinfo,
   if (offtype == NULL_TREE)
     offtype = TREE_TYPE (off);
 
-  if (DR_IS_READ (dr))
-    decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
-                                            offtype, scale);
+  if (use_ifn_p)
+    {
+      if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
+                                    memory_type, TYPE_PRECISION (offtype),
+                                    TYPE_SIGN (offtype), scale, &ifn,
+                                    &element_type))
+       return false;
+    }
   else
-    decl = targetm.vectorize.builtin_scatter (STMT_VINFO_VECTYPE (stmt_info),
-                                             offtype, scale);
+    {
+      if (DR_IS_READ (dr))
+       decl = targetm.vectorize.builtin_gather (vectype, offtype, scale);
+      else
+       decl = targetm.vectorize.builtin_scatter (vectype, offtype, scale);
 
-  if (decl == NULL_TREE)
-    return false;
+      if (!decl)
+       return false;
+
+      ifn = IFN_LAST;
+      element_type = TREE_TYPE (vectype);
+    }
 
+  info->ifn = ifn;
   info->decl = decl;
   info->base = base;
   info->offset = off;
   info->offset_dt = vect_unknown_def_type;
   info->offset_vectype = NULL_TREE;
   info->scale = scale;
+  info->element_type = element_type;
+  info->memory_type = memory_type;
   return true;
 }
 
@@ -3592,7 +3716,8 @@ again:
          bool maybe_gather
            = DR_IS_READ (dr)
              && !TREE_THIS_VOLATILE (DR_REF (dr))
-             && targetm.vectorize.builtin_gather != NULL;
+             && (targetm.vectorize.builtin_gather != NULL
+                 || supports_vec_gather_load_p ());
          bool maybe_scatter
            = DR_IS_WRITE (dr)
              && !TREE_THIS_VOLATILE (DR_REF (dr))
index 5124c110b240d91d968c91ab785f79a33bc3d135..f4b1b3e1ce92f82f0cbc4222357821cbd54d5dc9 100644 (file)
@@ -69,6 +69,8 @@ static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
                                                  tree *, tree *);
 static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
 static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
+static gimple *vect_recog_gather_scatter_pattern (vec<gimple *> *, tree *,
+                                                 tree *);
 
 struct vect_recog_func
 {
@@ -93,6 +95,10 @@ static vect_recog_func vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
       {        vect_recog_mult_pattern, "mult" },
       {        vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
       {        vect_recog_bool_pattern, "bool" },
+      /* This must come before mask conversion, and includes the parts
+        of mask conversion that are needed for gather and scatter
+        internal functions.  */
+      { vect_recog_gather_scatter_pattern, "gather_scatter" },
       {        vect_recog_mask_conversion_pattern, "mask_conversion" }
 };
 
@@ -4117,6 +4123,203 @@ vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
   return pattern_stmt;
 }
 
+/* STMT is a load or store.  If the load or store is conditional, return
+   the boolean condition under which it occurs, otherwise return null.  */
+
+static tree
+vect_get_load_store_mask (gimple *stmt)
+{
+  if (gassign *def_assign = dyn_cast <gassign *> (stmt))
+    {
+      gcc_assert (gimple_assign_single_p (def_assign));
+      return NULL_TREE;
+    }
+
+  if (gcall *def_call = dyn_cast <gcall *> (stmt))
+    {
+      internal_fn ifn = gimple_call_internal_fn (def_call);
+      int mask_index = internal_fn_mask_index (ifn);
+      return gimple_call_arg (def_call, mask_index);
+    }
+
+  gcc_unreachable ();
+}
+
+/* Return the scalar offset type that an internal gather/scatter function
+   should use.  GS_INFO describes the gather/scatter operation.  */
+
+static tree
+vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
+{
+  tree offset_type = TREE_TYPE (gs_info->offset);
+  unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
+
+  /* Enforced by vect_check_gather_scatter.  */
+  unsigned int offset_bits = TYPE_PRECISION (offset_type);
+  gcc_assert (element_bits >= offset_bits);
+
+  /* If the offset is narrower than the elements, extend it according
+     to its sign.  */
+  if (element_bits > offset_bits)
+    return build_nonstandard_integer_type (element_bits,
+                                          TYPE_UNSIGNED (offset_type));
+
+  return offset_type;
+}
+
+/* Return MASK if MASK is suitable for masking an operation on vectors
+   of type VECTYPE, otherwise convert it into such a form and return
+   the result.  Associate any conversion statements with STMT_INFO's
+   pattern.  */
+
+static tree
+vect_convert_mask_for_vectype (tree mask, tree vectype,
+                              stmt_vec_info stmt_info, vec_info *vinfo)
+{
+  tree mask_type = search_type_for_mask (mask, vinfo);
+  if (mask_type)
+    {
+      tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
+      if (mask_vectype
+         && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
+                      TYPE_VECTOR_SUBPARTS (mask_vectype)))
+       mask = build_mask_conversion (mask, vectype, stmt_info, vinfo);
+    }
+  return mask;
+}
+
+/* Return the equivalent of:
+
+     fold_convert (TYPE, VALUE)
+
+   with the expectation that the operation will be vectorized.
+   If new statements are needed, add them as pattern statements
+   to STMT_INFO.  */
+
+static tree
+vect_add_conversion_to_patterm (tree type, tree value,
+                               stmt_vec_info stmt_info,
+                               vec_info *vinfo)
+{
+  if (useless_type_conversion_p (type, TREE_TYPE (value)))
+    return value;
+
+  tree new_value = vect_recog_temp_ssa_var (type, NULL);
+  gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
+  stmt_vec_info new_stmt_info = new_stmt_vec_info (conversion, vinfo);
+  set_vinfo_for_stmt (conversion, new_stmt_info);
+  STMT_VINFO_VECTYPE (new_stmt_info) = get_vectype_for_scalar_type (type);
+  append_pattern_def_seq (stmt_info, conversion);
+  return new_value;
+}
+
+/* Try to convert STMT into a call to a gather load or scatter store
+   internal function.  Return the final statement on success and set
+   *TYPE_IN and *TYPE_OUT to the vector type being loaded or stored.
+
+   This function only handles gathers and scatters that were recognized
+   as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P).  */
+
+static gimple *
+vect_try_gather_scatter_pattern (gimple *stmt, stmt_vec_info last_stmt_info,
+                                tree *type_in, tree *type_out)
+{
+  /* Currently we only support this for loop vectorization.  */
+  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+  loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
+  if (!loop_vinfo)
+    return NULL;
+
+  /* Make sure that we're looking at a gather load or scatter store.  */
+  data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
+  if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+    return NULL;
+
+  /* Reject stores for now.  */
+  if (!DR_IS_READ (dr))
+    return NULL;
+
+  /* Get the boolean that controls whether the load or store happens.
+     This is null if the operation is unconditional.  */
+  tree mask = vect_get_load_store_mask (stmt);
+
+  /* Make sure that the target supports an appropriate internal
+     function for the gather/scatter operation.  */
+  gather_scatter_info gs_info;
+  if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info)
+      || gs_info.decl)
+    return NULL;
+
+  /* Convert the mask to the right form.  */
+  tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
+  if (mask)
+    mask = vect_convert_mask_for_vectype (mask, gs_vectype, last_stmt_info,
+                                         loop_vinfo);
+
+  /* Get the invariant base and non-invariant offset, converting the
+     latter to the same width as the vector elements.  */
+  tree base = gs_info.base;
+  tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
+  tree offset = vect_add_conversion_to_patterm (offset_type, gs_info.offset,
+                                               last_stmt_info, loop_vinfo);
+
+  /* Build the new pattern statement.  */
+  tree scale = size_int (gs_info.scale);
+  gcall *pattern_stmt;
+  if (DR_IS_READ (dr))
+    {
+      if (mask != NULL)
+       pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
+                                                  offset, scale, mask);
+      else
+       pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
+                                                  offset, scale);
+      tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
+      gimple_call_set_lhs (pattern_stmt, load_lhs);
+    }
+  else
+    /* Not yet supported.  */
+    gcc_unreachable ();
+  gimple_call_set_nothrow (pattern_stmt, true);
+
+  /* Copy across relevant vectorization info and associate DR with the
+     new pattern statement instead of the original statement.  */
+  stmt_vec_info pattern_stmt_info = new_stmt_vec_info (pattern_stmt,
+                                                      loop_vinfo);
+  set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
+  STMT_VINFO_DATA_REF (pattern_stmt_info) = dr;
+  STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
+    = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
+  STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
+    = STMT_VINFO_GATHER_SCATTER_P (stmt_info);
+  DR_STMT (dr) = pattern_stmt;
+
+  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+  *type_out = vectype;
+  *type_in = vectype;
+
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+                    "gather/scatter pattern detected:\n");
+
+  return pattern_stmt;
+}
+
+/* Pattern wrapper around vect_try_gather_scatter_pattern.  */
+
+static gimple *
+vect_recog_gather_scatter_pattern (vec<gimple *> *stmts, tree *type_in,
+                                  tree *type_out)
+{
+  gimple *last_stmt = stmts->pop ();
+  stmt_vec_info last_stmt_info = vinfo_for_stmt (last_stmt);
+  gimple *pattern_stmt = vect_try_gather_scatter_pattern (last_stmt,
+                                                         last_stmt_info,
+                                                         type_in, type_out);
+  if (pattern_stmt)
+    stmts->safe_push (last_stmt);
+  return pattern_stmt;
+}
 
 /* Mark statements that are involved in a pattern.  */
 
index e4d20514c00db75a299f366bb77076b5e3b9d197..a308d801082c1ece2d4cf645326eec78297f73f8 100644 (file)
@@ -391,21 +391,19 @@ exist_non_indexing_operands_for_use_p (tree use, gimple *stmt)
     {
       if (is_gimple_call (stmt)
          && gimple_call_internal_p (stmt))
-       switch (gimple_call_internal_fn (stmt))
-         {
-         case IFN_MASK_STORE:
-           operand = gimple_call_arg (stmt, 3);
-           if (operand == use)
-             return true;
-           /* FALLTHRU */
-         case IFN_MASK_LOAD:
-           operand = gimple_call_arg (stmt, 2);
-           if (operand == use)
-             return true;
-           break;
-         default:
-           break;
-         }
+       {
+         internal_fn ifn = gimple_call_internal_fn (stmt);
+         int mask_index = internal_fn_mask_index (ifn);
+         if (mask_index >= 0
+             && use == gimple_call_arg (stmt, mask_index))
+           return true;
+         if (internal_gather_scatter_fn_p (ifn)
+             && use == gimple_call_arg (stmt, 1))
+           return true;
+         if (ifn == IFN_MASK_STORE
+             && use == gimple_call_arg (stmt, 3))
+           return true;
+       }
       return false;
     }
 
@@ -1727,6 +1725,8 @@ static tree permute_vec_elements (tree, tree, tree, gimple *,
    is the type of the vector being loaded or stored.  MEMORY_ACCESS_TYPE
    says how the load or store is going to be implemented and GROUP_SIZE
    is the number of load or store statements in the containing group.
+   If the access is a gather load or scatter store, GS_INFO describes
+   its arguments.
 
    Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
    supported, otherwise record the required mask types.  */
@@ -1734,7 +1734,8 @@ static tree permute_vec_elements (tree, tree, tree, gimple *,
 static void
 check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
                          vec_load_store_type vls_type, int group_size,
-                         vect_memory_access_type memory_access_type)
+                         vect_memory_access_type memory_access_type,
+                         gather_scatter_info *gs_info)
 {
   /* Invariant loads need no special support.  */
   if (memory_access_type == VMAT_INVARIANT)
@@ -1762,6 +1763,29 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
       return;
     }
 
+  if (memory_access_type == VMAT_GATHER_SCATTER)
+    {
+      gcc_assert (is_load);
+      tree offset_type = TREE_TYPE (gs_info->offset);
+      if (!internal_gather_scatter_fn_supported_p (IFN_MASK_GATHER_LOAD,
+                                                  vectype,
+                                                  gs_info->memory_type,
+                                                  TYPE_SIGN (offset_type),
+                                                  gs_info->scale))
+       {
+         if (dump_enabled_p ())
+           dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+                            "can't use a fully-masked loop because the"
+                            " target doesn't have an appropriate masked"
+                            " gather load instruction.\n");
+         LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+         return;
+       }
+      unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
+      vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
+      return;
+    }
+
   if (memory_access_type != VMAT_CONTIGUOUS
       && memory_access_type != VMAT_CONTIGUOUS_PERMUTE)
     {
@@ -2563,6 +2587,31 @@ vect_build_gather_load_calls (gimple *stmt, gimple_stmt_iterator *gsi,
     }
 }
 
+/* Prepare the base and offset in GS_INFO for vectorization.
+   Set *DATAREF_PTR to the loop-invariant base address and *VEC_OFFSET
+   to the vectorized offset argument for the first copy of STMT.  STMT
+   is the statement described by GS_INFO and LOOP is the containing loop.  */
+
+static void
+vect_get_gather_scatter_ops (struct loop *loop, gimple *stmt,
+                            gather_scatter_info *gs_info,
+                            tree *dataref_ptr, tree *vec_offset)
+{
+  gimple_seq stmts = NULL;
+  *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
+  if (stmts != NULL)
+    {
+      basic_block new_bb;
+      edge pe = loop_preheader_edge (loop);
+      new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
+      gcc_assert (!new_bb);
+    }
+  tree offset_type = TREE_TYPE (gs_info->offset);
+  tree offset_vectype = get_vectype_for_scalar_type (offset_type);
+  *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt,
+                                             offset_vectype);
+}
+
 /* Check and perform vectorization of BUILT_IN_BSWAP{16,32,64}.  */
 
 static bool
@@ -2751,7 +2800,7 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
     return false;
 
   if (gimple_call_internal_p (stmt)
-      && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
+      && (internal_load_fn_p (gimple_call_internal_fn (stmt))
          || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
     /* Handled by vectorizable_load and vectorizable_store.  */
     return false;
@@ -5951,7 +6000,7 @@ vectorizable_store (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
       if (loop_vinfo
          && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
        check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
-                                 memory_access_type);
+                                 memory_access_type, &gs_info);
 
       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
       /* The SLP costs are calculated during SLP analysis.  */
@@ -6932,7 +6981,11 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
   else
     {
       gcall *call = dyn_cast <gcall *> (stmt);
-      if (!call || !gimple_call_internal_p (call, IFN_MASK_LOAD))
+      if (!call || !gimple_call_internal_p (call))
+       return false;
+
+      internal_fn ifn = gimple_call_internal_fn (call);
+      if (!internal_load_fn_p (ifn))
        return false;
 
       scalar_dest = gimple_call_lhs (call);
@@ -6947,9 +7000,13 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
          return false;
        }
 
-      mask = gimple_call_arg (call, 2);
-      if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
-       return false;
+      int mask_index = internal_fn_mask_index (ifn);
+      if (mask_index >= 0)
+       {
+         mask = gimple_call_arg (call, mask_index);
+         if (!vect_check_load_store_mask (stmt, mask, &mask_vectype))
+           return false;
+       }
     }
 
   if (!STMT_VINFO_DATA_REF (stmt_info))
@@ -7073,7 +7130,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
                                             TYPE_MODE (mask_vectype), true))
            return false;
        }
-      else if (memory_access_type == VMAT_GATHER_SCATTER)
+      else if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
        {
          tree arglist = TYPE_ARG_TYPES (TREE_TYPE (gs_info.decl));
          tree masktype
@@ -7087,7 +7144,8 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
              return false;
            }
        }
-      else if (memory_access_type != VMAT_LOAD_STORE_LANES)
+      else if (memory_access_type != VMAT_LOAD_STORE_LANES
+              && memory_access_type != VMAT_GATHER_SCATTER)
        {
          if (dump_enabled_p ())
            dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
@@ -7104,7 +7162,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
       if (loop_vinfo
          && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
        check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
-                                 memory_access_type);
+                                 memory_access_type, &gs_info);
 
       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
       /* The SLP costs are calculated during SLP analysis.  */
@@ -7126,7 +7184,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 
   ensure_base_align (dr);
 
-  if (memory_access_type == VMAT_GATHER_SCATTER)
+  if (memory_access_type == VMAT_GATHER_SCATTER && gs_info.decl)
     {
       vect_build_gather_load_calls (stmt, gsi, vec_stmt, &gs_info, mask);
       return true;
@@ -7571,6 +7629,7 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
     aggr_type = vectype;
 
   tree vec_mask = NULL_TREE;
+  tree vec_offset = NULL_TREE;
   prev_stmt_info = NULL;
   poly_uint64 group_elt = 0;
   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
@@ -7613,6 +7672,12 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
              dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi,
                                             stmt, diff);
            }
+         else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+           {
+             vect_get_gather_scatter_ops (loop, stmt, &gs_info,
+                                          &dataref_ptr, &vec_offset);
+             inv_p = false;
+           }
          else
            dataref_ptr
              = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
@@ -7628,6 +7693,13 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
          if (dataref_offset)
            dataref_offset = int_const_binop (PLUS_EXPR, dataref_offset,
                                              TYPE_SIZE_UNIT (aggr_type));
+         else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+           {
+             gimple *def_stmt;
+             vect_def_type dt;
+             vect_is_simple_use (vec_offset, loop_vinfo, &def_stmt, &dt);
+             vec_offset = vect_get_vec_def_for_stmt_copy (dt, vec_offset);
+           }
          else
            dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
                                           TYPE_SIZE_UNIT (aggr_type));
@@ -7716,6 +7788,24 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
                  {
                    unsigned int align, misalign;
 
+                   if (memory_access_type == VMAT_GATHER_SCATTER)
+                     {
+                       tree scale = size_int (gs_info.scale);
+                       gcall *call;
+                       if (masked_loop_p)
+                         call = gimple_build_call_internal
+                           (IFN_MASK_GATHER_LOAD, 4, dataref_ptr,
+                            vec_offset, scale, final_mask);
+                       else
+                         call = gimple_build_call_internal
+                           (IFN_GATHER_LOAD, 3, dataref_ptr,
+                            vec_offset, scale);
+                       gimple_call_set_nothrow (call, true);
+                       new_stmt = call;
+                       data_ref = NULL_TREE;
+                       break;
+                     }
+
                    align = DR_TARGET_ALIGNMENT (dr);
                    if (alignment_support_scheme == dr_aligned)
                      {
index 003d4accedb6b7e0bcdb0349fba29a75c654f2c2..a9ccdfd11367ba6fe2f6725ff82aadd288604c63 100644 (file)
@@ -844,7 +844,12 @@ typedef struct _stmt_vec_info {
 
 /* Information about a gather/scatter call.  */
 struct gather_scatter_info {
-  /* The FUNCTION_DECL for the built-in gather/scatter function.  */
+  /* The internal function to use for the gather/scatter operation,
+     or IFN_LAST if a built-in function should be used instead.  */
+  internal_fn ifn;
+
+  /* The FUNCTION_DECL for the built-in gather/scatter function,
+     or null if an internal function should be used instead.  */
   tree decl;
 
   /* The loop-invariant base value.  */
@@ -862,6 +867,12 @@ struct gather_scatter_info {
 
   /* The type of the vectorized offset.  */
   tree offset_vectype;
+
+  /* The type of the scalar elements after loading or before storing.  */
+  tree element_type;
+
+  /* The type of the scalar elements being loaded or stored.  */
+  tree memory_type;
 };
 
 /* Access Functions.  */
@@ -1533,7 +1544,7 @@ extern void duplicate_and_interleave (gimple_seq *, tree, vec<tree>,
    Additional pattern recognition functions can (and will) be added
    in the future.  */
 typedef gimple *(* vect_recog_func_ptr) (vec<gimple *> *, tree *, tree *);
-#define NUM_PATTERNS 14
+#define NUM_PATTERNS 15
 void vect_pattern_recog (vec_info *);
 
 /* In tree-vectorizer.c.  */