2016-04-25 Michael Collison <michael.collison@linaro.org>

author collison <collison@138bc75d-0d04-0410-961f-82ee72b054a4>

Mon, 25 Apr 2016 05:03:09 +0000 (05:03 +0000)

committer collison <collison@138bc75d-0d04-0410-961f-82ee72b054a4>

Mon, 25 Apr 2016 05:03:09 +0000 (05:03 +0000)
author collison <collison@138bc75d-0d04-0410-961f-82ee72b054a4>
Mon, 25 Apr 2016 05:03:09 +0000 (05:03 +0000)
committer collison <collison@138bc75d-0d04-0410-961f-82ee72b054a4>
Mon, 25 Apr 2016 05:03:09 +0000 (05:03 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 8e7059e0e0efba737e4b0744292caa757ad7ffda..4b7607a07a7d35e75d2cac1bedf0a6d71cc8392c 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,25 @@
+2016-04-25  Michael Collison  <michael.collison@linaro.org>
+
+    * config/arm/neon.md (widen_<us>sum<mode>): New patterns where
+    mode is VQI to improve mixed mode vectorization.
+    * config/arm/neon.md (vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3): New
+    define_insn to match low half of signed vaddw.
+    * config/arm/neon.md (vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3): New
+    define_insn to match high half of signed vaddw.
+    * config/arm/neon.md (vec_sel_widen_usum_lo<VQI:mode><VW:mode>3): New
+    define_insn to match low half of unsigned vaddw.
+    * config/arm/neon.md (vec_sel_widen_usum_hi<VQI:mode><VW:mode>3): New
+    define_insn to match high half of unsigned vaddw.
+    * config/arm/arm.c (arm_simd_vect_par_cnst_half): New function.
+    (arm_simd_check_vect_par_cnst_half_p): Likewise.
+    * config/arm/arm-protos.h (arm_simd_vect_par_cnst_half): Prototype
+    for new function.
+    (arm_simd_check_vect_par_cnst_half_p): Likewise.
+    * config/arm/predicates.md (vect_par_constant_high): Support
+    big endian and simplify by calling
+    arm_simd_check_vect_par_cnst_half
+    (vect_par_constant_low): Likewise.
+       
  2016-04-25  Uros Bizjak  <ubizjak@gmail.com>
  
         * config/i386/i386.md (*lea<mode>_general_4): Use const_0_to_3_operand
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h

index 0083673b161a49e19388c72d3a413aeb481dbfa3..d8179c441bb53dced94d2ebf497aad093e4ac600 100644 (file)
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -50,7 +50,9 @@ extern tree arm_builtin_decl (unsigned code, bool initialize_p
                               ATTRIBUTE_UNUSED);
  extern void arm_init_builtins (void);
  extern void arm_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update);
-
+extern rtx arm_simd_vect_par_cnst_half (machine_mode mode, bool high);
+extern bool arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
+                                                bool high);
  #ifdef RTX_CODE
  extern bool arm_vector_mode_supported_p (machine_mode);
  extern bool arm_small_register_classes_for_mode_p (machine_mode);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c

index 4d9f6f1ad7efaedcecfff2361167ec842d18dff0..71b51439dc7ba5be67671e9fb4c3f18040cce58f 100644 (file)
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -30302,4 +30302,80 @@ arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
    return;
  }
  
+
+/* Construct and return a PARALLEL RTX vector with elements numbering the
+   lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
+   the vector - from the perspective of the architecture.  This does not
+   line up with GCC's perspective on lane numbers, so we end up with
+   different masks depending on our target endian-ness.  The diagram
+   below may help.  We must draw the distinction when building masks
+   which select one half of the vector.  An instruction selecting
+   architectural low-lanes for a big-endian target, must be described using
+   a mask selecting GCC high-lanes.
+
+                 Big-Endian             Little-Endian
+
+GCC             0   1   2   3           3   2   1   0
+              | x | x | x | x |       | x | x | x | x |
+Architecture    3   2   1   0           3   2   1   0
+
+Low Mask:         { 2, 3 }                { 0, 1 }
+High Mask:        { 0, 1 }                { 2, 3 }
+*/
+
+rtx
+arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
+{
+  int nunits = GET_MODE_NUNITS (mode);
+  rtvec v = rtvec_alloc (nunits / 2);
+  int high_base = nunits / 2;
+  int low_base = 0;
+  int base;
+  rtx t1;
+  int i;
+
+  if (BYTES_BIG_ENDIAN)
+    base = high ? low_base : high_base;
+  else
+    base = high ? high_base : low_base;
+
+  for (i = 0; i < nunits / 2; i++)
+    RTVEC_ELT (v, i) = GEN_INT (base + i);
+
+  t1 = gen_rtx_PARALLEL (mode, v);
+  return t1;
+}
+
+/* Check OP for validity as a PARALLEL RTX vector with elements
+   numbering the lanes of either the high (HIGH == TRUE) or low lanes,
+   from the perspective of the architecture.  See the diagram above
+   arm_simd_vect_par_cnst_half_p for more details.  */
+
+bool
+arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
+                                      bool high)
+{
+  rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
+  HOST_WIDE_INT count_op = XVECLEN (op, 0);
+  HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
+  int i = 0;
+
+  if (!VECTOR_MODE_P (mode))
+    return false;
+
+  if (count_op != count_ideal)
+    return false;
+
+  for (i = 0; i < count_ideal; i++)
+    {
+      rtx elt_op = XVECEXP (op, 0, i);
+      rtx elt_ideal = XVECEXP (ideal, 0, i);
+
+      if (!CONST_INT_P (elt_op)
+         || INTVAL (elt_ideal) != INTVAL (elt_op))
+       return false;
+    }
+  return true;
+}
+
  #include "gt-arm.h"
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md

index 879c07c13b6aa20c46828d08f5a4f413a5722eca..6b4896de61fc76844ac00b05feb42fa857bba4ca 100644 (file)
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -1204,16 +1204,133 @@
  
  ;; Widening operations
  
+(define_expand "widen_ssum<mode>3"
+  [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
+       (plus:<V_double_width>
+        (sign_extend:<V_double_width>
+         (match_operand:VQI 1 "s_register_operand" ""))
+        (match_operand:<V_double_width> 2 "s_register_operand" "")))]
+  "TARGET_NEON"
+  {
+    machine_mode mode = GET_MODE (operands[1]);
+    rtx p1, p2;
+
+    p1  = arm_simd_vect_par_cnst_half (mode, false);
+    p2  = arm_simd_vect_par_cnst_half (mode, true);
+
+    if (operands[0] != operands[2])
+      emit_move_insn (operands[0], operands[2]);
+
+    emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
+                                                        operands[1],
+                                                        p1,
+                                                        operands[0]));
+    emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
+                                                        operands[1],
+                                                        p2,
+                                                        operands[0]));
+    DONE;
+  }
+)
+
+(define_insn "vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3"
+  [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
+       (plus:<VW:V_widen>
+        (sign_extend:<VW:V_widen>
+         (vec_select:VW
+          (match_operand:VQI 1 "s_register_operand" "%w")
+          (match_operand:VQI 2 "vect_par_constant_low" "")))
+        (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
+  "TARGET_NEON"
+{
+  return BYTES_BIG_ENDIAN ?  "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
+    "vaddw.<V_s_elem>\t%q0, %q3, %e1";
+}
+  [(set_attr "type" "neon_add_widen")])
+
+(define_insn "vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3"
+  [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
+       (plus:<VW:V_widen>
+        (sign_extend:<VW:V_widen>
+         (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
+                        (match_operand:VQI 2 "vect_par_constant_high" "")))
+        (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
+  "TARGET_NEON"
+{
+  return BYTES_BIG_ENDIAN ?  "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
+    "vaddw.<V_s_elem>\t%q0, %q3, %f1";
+}
+  [(set_attr "type" "neon_add_widen")])
+
  (define_insn "widen_ssum<mode>3"
    [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
-       (plus:<V_widen> (sign_extend:<V_widen>
-                         (match_operand:VW 1 "s_register_operand" "%w"))
-                       (match_operand:<V_widen> 2 "s_register_operand" "w")))]
+       (plus:<V_widen>
+        (sign_extend:<V_widen>
+         (match_operand:VW 1 "s_register_operand" "%w"))
+        (match_operand:<V_widen> 2 "s_register_operand" "w")))]
    "TARGET_NEON"
    "vaddw.<V_s_elem>\t%q0, %q2, %P1"
    [(set_attr "type" "neon_add_widen")]
  )
  
+(define_expand "widen_usum<mode>3"
+  [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
+       (plus:<V_double_width>
+        (zero_extend:<V_double_width>
+         (match_operand:VQI 1 "s_register_operand" ""))
+        (match_operand:<V_double_width> 2 "s_register_operand" "")))]
+  "TARGET_NEON"
+  {
+    machine_mode mode = GET_MODE (operands[1]);
+    rtx p1, p2;
+
+    p1  = arm_simd_vect_par_cnst_half (mode, false);
+    p2  = arm_simd_vect_par_cnst_half (mode, true);
+
+    if (operands[0] != operands[2])
+      emit_move_insn (operands[0], operands[2]);
+
+    emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
+                                                        operands[1],
+                                                        p1,
+                                                        operands[0]));
+    emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
+                                                        operands[1],
+                                                        p2,
+                                                        operands[0]));
+    DONE;
+  }
+)
+
+(define_insn "vec_sel_widen_usum_lo<VQI:mode><VW:mode>3"
+  [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
+       (plus:<VW:V_widen>
+        (zero_extend:<VW:V_widen>
+         (vec_select:VW
+          (match_operand:VQI 1 "s_register_operand" "%w")
+          (match_operand:VQI 2 "vect_par_constant_low" "")))
+        (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
+  "TARGET_NEON"
+{
+  return BYTES_BIG_ENDIAN ?  "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
+    "vaddw.<V_u_elem>\t%q0, %q3, %e1";
+}
+  [(set_attr "type" "neon_add_widen")])
+
+(define_insn "vec_sel_widen_usum_hi<VQI:mode><VW:mode>3"
+  [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
+       (plus:<VW:V_widen>
+        (zero_extend:<VW:V_widen>
+         (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
+                        (match_operand:VQI 2 "vect_par_constant_high" "")))
+        (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
+  "TARGET_NEON"
+{
+ return BYTES_BIG_ENDIAN ?  "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
+    "vaddw.<V_u_elem>\t%q0, %q3, %f1";
+}
+  [(set_attr "type" "neon_add_widen")])
+
  (define_insn "widen_usum<mode>3"
    [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
         (plus:<V_widen> (zero_extend:<V_widen>
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md

index b1cd556211a200a73487c5947fcab08886ae749e..ad92f6c5419b351b076af13cef234a9d002952e7 100644 (file)
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -612,59 +612,13 @@
  (define_special_predicate "vect_par_constant_high" 
    (match_code "parallel")
  {
-  HOST_WIDE_INT count = XVECLEN (op, 0);
-  int i;
-  int base = GET_MODE_NUNITS (mode);
-
-  if ((count < 1)
-      || (count != base/2))
-    return false;
-    
-  if (!VECTOR_MODE_P (mode))
-    return false;
-
-  for (i = 0; i < count; i++)
-   {
-     rtx elt = XVECEXP (op, 0, i);
-     int val;
-
-     if (!CONST_INT_P (elt))
-       return false;
-
-     val = INTVAL (elt);
-     if (val != (base/2) + i)
-       return false;
-   }
-  return true; 
+  return arm_simd_check_vect_par_cnst_half_p (op, mode, true);
  })
  
  (define_special_predicate "vect_par_constant_low"
    (match_code "parallel")
  {
-  HOST_WIDE_INT count = XVECLEN (op, 0);
-  int i;
-  int base = GET_MODE_NUNITS (mode);
-
-  if ((count < 1)
-      || (count != base/2))
-    return false;
-    
-  if (!VECTOR_MODE_P (mode))
-    return false;
-
-  for (i = 0; i < count; i++)
-   {
-     rtx elt = XVECEXP (op, 0, i);
-     int val;
-
-     if (!CONST_INT_P (elt))
-       return false;
-
-     val = INTVAL (elt);
-     if (val != i)
-       return false;
-   } 
-  return true; 
+  return arm_simd_check_vect_par_cnst_half_p (op, mode, false);
  })
  
  (define_predicate "const_double_vcvt_power_of_two_reciprocal"
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index 82e814a36769f12c04903f0355a2a2e578d7d473..d49f1895160e173bb3590aaa9144d3ed98ecee00 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,14 @@
+2016-04-25  Michael Collison <michael.collison@arm.com>
+
+       * testsuite/gcc.target/arm/neon-vaddws16.c: New test.
+       * testsuite/gcc.target/arm/neon-vaddws32.c: New test.
+       * testsuite/gcc.target/arm/neon-vaddwu16.c: New test.
+       * testsuite/gcc.target/arm/neon-vaddwu32.c: New test.
+       * testsuite/gcc.target/arm/neon-vaddwu8.c: New test.
+       * testsuite/lib/target-supports.exp
+       (check_effective_target_vect_widen_sum_hi_to_si_pattern): Indicate
+       that arm neon support vector widen sum of HImode TO SImode.
+
  2016-04-23  Jakub Jelinek  <jakub@redhat.com>
  
         PR sanitizer/70712
diff --git a/gcc/testsuite/gcc.target/arm/neon-vaddws16.c b/gcc/testsuite/gcc.target/arm/neon-vaddws16.c

new file mode 100644 (file)

index 0000000..8281134
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vaddws16.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O3" } */
+/* { dg-add-options arm_neon } */
+
+
+
+int
+t6 (int len, void * dummy, short * __restrict x)
+{
+  len = len & ~31;
+  int result = 0;
+  __asm volatile ("");
+  for (int i = 0; i < len; i++)
+    result += x[i];
+  return result;
+}
+
+/* { dg-final { scan-assembler "vaddw\.s16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon-vaddws32.c b/gcc/testsuite/gcc.target/arm/neon-vaddws32.c

new file mode 100644 (file)

index 0000000..8c18691
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vaddws32.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O3" } */
+/* { dg-add-options arm_neon } */
+
+
+int
+t6 (int len, void * dummy, int * __restrict x)
+{
+  len = len & ~31;
+  long long result = 0;
+  __asm volatile ("");
+  for (int i = 0; i < len; i++)
+    result += x[i];
+  return result;
+}
+
+/* { dg-final { scan-assembler "vaddw\.s32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon-vaddwu16.c b/gcc/testsuite/gcc.target/arm/neon-vaddwu16.c

new file mode 100644 (file)

index 0000000..580bb06
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vaddwu16.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O3" } */
+/* { dg-add-options arm_neon } */
+
+
+int
+t6 (int len, void * dummy, unsigned short * __restrict x)
+{
+  len = len & ~31;
+  unsigned int result = 0;
+  __asm volatile ("");
+  for (int i = 0; i < len; i++)
+    result += x[i];
+  return result;
+}
+
+/* { dg-final { scan-assembler "vaddw.u16" } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon-vaddwu32.c b/gcc/testsuite/gcc.target/arm/neon-vaddwu32.c

new file mode 100644 (file)

index 0000000..21b0633
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vaddwu32.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O3" } */
+/* { dg-add-options arm_neon } */
+
+
+int
+t6 (int len, void * dummy, unsigned int * __restrict x)
+{
+  len = len & ~31;
+  unsigned long long result = 0;
+  __asm volatile ("");
+  for (int i = 0; i < len; i++)
+    result += x[i];
+  return result;
+}
+
+/* { dg-final { scan-assembler "vaddw\.u32" } } */
diff --git a/gcc/testsuite/gcc.target/arm/neon-vaddwu8.c b/gcc/testsuite/gcc.target/arm/neon-vaddwu8.c

new file mode 100644 (file)

index 0000000..d350ed5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vaddwu8.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neon_ok } */
+/* { dg-options "-O3" } */
+/* { dg-add-options arm_neon } */
+
+
+
+int
+t6 (int len, void * dummy, char * __restrict x)
+{
+  len = len & ~31;
+  unsigned short result = 0;
+  __asm volatile ("");
+  for (int i = 0; i < len; i++)
+    result += x[i];
+  return result;
+}
+
+/* { dg-final { scan-assembler "vaddw\.u8" } } */
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp

index 3d44e1721dd69145696e9c275fa93ee1757a95a0..422bbab16cb6fcdbe77ed6661d34a8ba17b5b41b 100644 (file)
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -4348,6 +4348,8 @@ proc check_effective_target_vect_widen_sum_hi_to_si_pattern { } {
          set et_vect_widen_sum_hi_to_si_pattern_saved 0
          if { [istarget powerpc*-*-*]
               || [istarget aarch64*-*-*]
+            || ([istarget arm*-*-*] &&
+                [check_effective_target_arm_neon_ok])
               || [istarget ia64-*-*] } {
              set et_vect_widen_sum_hi_to_si_pattern_saved 1
          }
author	collison <collison@138bc75d-0d04-0410-961f-82ee72b054a4>
	Mon, 25 Apr 2016 05:03:09 +0000 (05:03 +0000)
committer	collison <collison@138bc75d-0d04-0410-961f-82ee72b054a4>
	Mon, 25 Apr 2016 05:03:09 +0000 (05:03 +0000)
gcc/ChangeLog		patch \| blob \| blame \| history
gcc/config/arm/arm-protos.h		patch \| blob \| blame \| history
gcc/config/arm/arm.c		patch \| blob \| blame \| history
gcc/config/arm/neon.md		patch \| blob \| blame \| history
gcc/config/arm/predicates.md		patch \| blob \| blame \| history
gcc/testsuite/ChangeLog		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/arm/neon-vaddws16.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/arm/neon-vaddws32.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/arm/neon-vaddwu16.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/arm/neon-vaddwu32.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/arm/neon-vaddwu8.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/lib/target-supports.exp		patch \| blob \| blame \| history