Detect EXT patterns to vec_perm_const, use for EXT intrinsics

author Alan Lawrence <alan.lawrence@arm.com>

Thu, 29 May 2014 16:57:42 +0000 (16:57 +0000)

committer Alan Lawrence <alalaw01@gcc.gnu.org>

Thu, 29 May 2014 16:57:42 +0000 (16:57 +0000)
author Alan Lawrence <alan.lawrence@arm.com>
Thu, 29 May 2014 16:57:42 +0000 (16:57 +0000)
committer Alan Lawrence <alalaw01@gcc.gnu.org>
Thu, 29 May 2014 16:57:42 +0000 (16:57 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 38d7cc84830f8fd89ee8ee5e7f698a444f4a5b66..273bf64570e7b832ad03a494cae3250ac944ddcd 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,22 @@
+2014-05-29  Alan Lawrence  <alan.lawrence@arm.com>
+
+       * config/aarch64/aarch64-builtins.c (aarch64_types_binopv_qualifiers,
+       TYPES_BINOPV): New static data.
+       * config/aarch64/aarch64-simd-builtins.def (im_lane_bound): New builtin.
+       * config/aarch64/aarch64-simd.md (aarch64_ext, aarch64_im_lane_boundsi):
+       New patterns.
+       * config/aarch64/aarch64.c (aarch64_expand_vec_perm_const_1): Match
+       patterns for EXT.
+       (aarch64_evpc_ext): New function.
+
+       * config/aarch64/iterators.md (UNSPEC_EXT): New enum element.
+
+       * config/aarch64/arm_neon.h (vext_f32, vext_f64, vext_p8, vext_p16,
+       vext_s8, vext_s16, vext_s32, vext_s64, vext_u8, vext_u16, vext_u32,
+       vext_u64, vextq_f32, vextq_f64, vextq_p8, vextq_p16, vextq_s8,
+       vextq_s16, vextq_s32, vextq_s64, vextq_u8, vextq_u16, vextq_u32,
+       vextq_u64): Replace __asm with __builtin_shuffle and im_lane_boundsi.
+
  2014-05-29  Tom de Vries  <tom@codesourcery.com>
  
         * rtl.h (BLOCK_SYMBOL_CHECK): Use SYMBOL_REF_FLAGS.
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c

index afd568ebeea6b000ca624141baa4552ca991553e..ca14d51715d30f5695fe81edb02c89f382c8603d 100644 (file)
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -169,6 +169,10 @@ aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
    = { qualifier_none, qualifier_none, qualifier_maybe_immediate };
  #define TYPES_BINOP (aarch64_types_binop_qualifiers)
  static enum aarch64_type_qualifiers
+aarch64_types_binopv_qualifiers[SIMD_MAX_BUILTIN_ARGS]
+  = { qualifier_void, qualifier_none, qualifier_none };
+#define TYPES_BINOPV (aarch64_types_binopv_qualifiers)
+static enum aarch64_type_qualifiers
  aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
    = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned };
  #define TYPES_BINOPU (aarch64_types_binopu_qualifiers)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def

index 339e8f86a4bbfb83e26eb169102c4a365d114c03..b5d9965cbcbb2e074f40bebdc83e40049fe313a1 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -410,3 +410,6 @@
    /* Implemented by aarch64_crypto_pmull<mode>.  */
    VAR1 (BINOPP, crypto_pmull, 0, di)
    VAR1 (BINOPP, crypto_pmull, 0, v2di)
+
+  /* Meta-op to check lane bounds of immediate in aarch64_expand_builtin.  */
+  VAR1 (BINOPV, im_lane_bound, 0, si)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index 108bc8d88931e67e6c7eeb77774a01bb391a1ced..c239677a58116d97b788254d23bb144718263313 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -4167,6 +4167,35 @@
    [(set_attr "type" "neon_permute<q>")]
  )
  
+;; Note immediate (third) operand is lane index not byte index.
+(define_insn "aarch64_ext<mode>"
+  [(set (match_operand:VALL 0 "register_operand" "=w")
+        (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
+                      (match_operand:VALL 2 "register_operand" "w")
+                      (match_operand:SI 3 "immediate_operand" "i")]
+                     UNSPEC_EXT))]
+  "TARGET_SIMD"
+{
+  operands[3] = GEN_INT (INTVAL (operands[3])
+      * GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
+  return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
+}
+  [(set_attr "type" "neon_ext<q>")]
+)
+
+;; This exists solely to check the arguments to the corresponding __builtin.
+;; Used where we want an error for out-of-range indices which would otherwise
+;; be silently wrapped (e.g. the mask to a __builtin_shuffle).
+(define_expand "aarch64_im_lane_boundsi"
+  [(match_operand:SI 0 "immediate_operand" "i")
+   (match_operand:SI 1 "immediate_operand" "i")]
+  "TARGET_SIMD"
+{
+  aarch64_simd_lane_bounds (operands[0], 0, INTVAL (operands[1]));
+  DONE;
+}
+)
+
  (define_insn "aarch64_st2<mode>_dreg"
    [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
         (unspec:TI [(match_operand:OI 1 "register_operand" "w")
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

index c2f6c4f483b33aeb80102bd019c363e6b96f6b6d..f69457a5068b04659282f2a5fd518e4c1765d77c 100644 (file)
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -8990,6 +8990,70 @@ aarch64_evpc_zip (struct expand_vec_perm_d *d)
    return true;
  }
  
+/* Recognize patterns for the EXT insn.  */
+
+static bool
+aarch64_evpc_ext (struct expand_vec_perm_d *d)
+{
+  unsigned int i, nelt = d->nelt;
+  rtx (*gen) (rtx, rtx, rtx, rtx);
+  rtx offset;
+
+  unsigned int location = d->perm[0]; /* Always < nelt.  */
+
+  /* Check if the extracted indices are increasing by one.  */
+  for (i = 1; i < nelt; i++)
+    {
+      unsigned int required = location + i;
+      if (d->one_vector_p)
+        {
+          /* We'll pass the same vector in twice, so allow indices to wrap.  */
+         required &= (nelt - 1);
+       }
+      if (d->perm[i] != required)
+        return false;
+    }
+
+  /* The mid-end handles masks that just return one of the input vectors.  */
+  gcc_assert (location != 0);
+
+  switch (d->vmode)
+    {
+    case V16QImode: gen = gen_aarch64_extv16qi; break;
+    case V8QImode: gen = gen_aarch64_extv8qi; break;
+    case V4HImode: gen = gen_aarch64_extv4hi; break;
+    case V8HImode: gen = gen_aarch64_extv8hi; break;
+    case V2SImode: gen = gen_aarch64_extv2si; break;
+    case V4SImode: gen = gen_aarch64_extv4si; break;
+    case V2SFmode: gen = gen_aarch64_extv2sf; break;
+    case V4SFmode: gen = gen_aarch64_extv4sf; break;
+    case V2DImode: gen = gen_aarch64_extv2di; break;
+    case V2DFmode: gen = gen_aarch64_extv2df; break;
+    default:
+      return false;
+    }
+
+  /* Success! */
+  if (d->testing_p)
+    return true;
+
+  if (BYTES_BIG_ENDIAN)
+    {
+      /* After setup, we want the high elements of the first vector (stored
+         at the LSB end of the register), and the low elements of the second
+         vector (stored at the MSB end of the register). So swap.  */
+      rtx temp = d->op0;
+      d->op0 = d->op1;
+      d->op1 = temp;
+      /* location != 0 (above), so safe to assume (nelt - location) < nelt.  */
+      location = nelt - location;
+    }
+
+  offset = GEN_INT (location);
+  emit_insn (gen (d->target, d->op0, d->op1, offset));
+  return true;
+}
+
  static bool
  aarch64_evpc_dup (struct expand_vec_perm_d *d)
  {
@@ -9094,7 +9158,9 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
  
    if (TARGET_SIMD)
      {
-      if (aarch64_evpc_zip (d))
+      if (aarch64_evpc_ext (d))
+       return true;
+      else if (aarch64_evpc_zip (d))
         return true;
        else if (aarch64_evpc_uzp (d))
         return true;
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h

index 1d39c0660332b4356088439dd5b6acc9be915781..c4b5731996c3400d3776f132c0aca12d90f1a671 100644 (file)
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -5661,318 +5661,6 @@ vcvtxd_f32_f64 (float64_t a)
    return result;
  }
  
-#define vext_f32(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       float32x2_t b_ = (b);                                            \
-       float32x2_t a_ = (a);                                            \
-       float32x2_t result;                                              \
-       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4"                        \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vext_f64(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       float64x1_t b_ = (b);                                            \
-       float64x1_t a_ = (a);                                            \
-       float64x1_t result;                                              \
-       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8"                        \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vext_p8(a, b, c)                                                \
-  __extension__                                                         \
-    ({                                                                  \
-       poly8x8_t b_ = (b);                                              \
-       poly8x8_t a_ = (a);                                              \
-       poly8x8_t result;                                                \
-       __asm__ ("ext %0.8b,%1.8b,%2.8b,%3"                              \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vext_p16(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       poly16x4_t b_ = (b);                                             \
-       poly16x4_t a_ = (a);                                             \
-       poly16x4_t result;                                               \
-       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2"                        \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vext_s8(a, b, c)                                                \
-  __extension__                                                         \
-    ({                                                                  \
-       int8x8_t b_ = (b);                                               \
-       int8x8_t a_ = (a);                                               \
-       int8x8_t result;                                                 \
-       __asm__ ("ext %0.8b,%1.8b,%2.8b,%3"                              \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vext_s16(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x4_t b_ = (b);                                              \
-       int16x4_t a_ = (a);                                              \
-       int16x4_t result;                                                \
-       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2"                        \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vext_s32(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x2_t b_ = (b);                                              \
-       int32x2_t a_ = (a);                                              \
-       int32x2_t result;                                                \
-       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4"                        \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vext_s64(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       int64x1_t b_ = (b);                                              \
-       int64x1_t a_ = (a);                                              \
-       int64x1_t result;                                                \
-       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8"                        \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vext_u8(a, b, c)                                                \
-  __extension__                                                         \
-    ({                                                                  \
-       uint8x8_t b_ = (b);                                              \
-       uint8x8_t a_ = (a);                                              \
-       uint8x8_t result;                                                \
-       __asm__ ("ext %0.8b,%1.8b,%2.8b,%3"                              \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vext_u16(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x4_t b_ = (b);                                             \
-       uint16x4_t a_ = (a);                                             \
-       uint16x4_t result;                                               \
-       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*2"                        \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vext_u32(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x2_t b_ = (b);                                             \
-       uint32x2_t a_ = (a);                                             \
-       uint32x2_t result;                                               \
-       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*4"                        \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vext_u64(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       uint64x1_t b_ = (b);                                             \
-       uint64x1_t a_ = (a);                                             \
-       uint64x1_t result;                                               \
-       __asm__ ("ext %0.8b, %1.8b, %2.8b, #%3*8"                        \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_f32(a, b, c)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       float32x4_t b_ = (b);                                            \
-       float32x4_t a_ = (a);                                            \
-       float32x4_t result;                                              \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4"                     \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_f64(a, b, c)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       float64x2_t b_ = (b);                                            \
-       float64x2_t a_ = (a);                                            \
-       float64x2_t result;                                              \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8"                     \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_p8(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       poly8x16_t b_ = (b);                                             \
-       poly8x16_t a_ = (a);                                             \
-       poly8x16_t result;                                               \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3"                       \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_p16(a, b, c)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       poly16x8_t b_ = (b);                                             \
-       poly16x8_t a_ = (a);                                             \
-       poly16x8_t result;                                               \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2"                     \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_s8(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       int8x16_t b_ = (b);                                              \
-       int8x16_t a_ = (a);                                              \
-       int8x16_t result;                                                \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3"                       \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_s16(a, b, c)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       int16x8_t b_ = (b);                                              \
-       int16x8_t a_ = (a);                                              \
-       int16x8_t result;                                                \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2"                     \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_s32(a, b, c)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       int32x4_t b_ = (b);                                              \
-       int32x4_t a_ = (a);                                              \
-       int32x4_t result;                                                \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4"                     \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_s64(a, b, c)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       int64x2_t b_ = (b);                                              \
-       int64x2_t a_ = (a);                                              \
-       int64x2_t result;                                                \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8"                     \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_u8(a, b, c)                                               \
-  __extension__                                                         \
-    ({                                                                  \
-       uint8x16_t b_ = (b);                                             \
-       uint8x16_t a_ = (a);                                             \
-       uint8x16_t result;                                               \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3"                       \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_u16(a, b, c)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       uint16x8_t b_ = (b);                                             \
-       uint16x8_t a_ = (a);                                             \
-       uint16x8_t result;                                               \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*2"                     \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_u32(a, b, c)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       uint32x4_t b_ = (b);                                             \
-       uint32x4_t a_ = (a);                                             \
-       uint32x4_t result;                                               \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*4"                     \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
-#define vextq_u64(a, b, c)                                              \
-  __extension__                                                         \
-    ({                                                                  \
-       uint64x2_t b_ = (b);                                             \
-       uint64x2_t a_ = (a);                                             \
-       uint64x2_t result;                                               \
-       __asm__ ("ext %0.16b, %1.16b, %2.16b, #%3*8"                     \
-                : "=w"(result)                                          \
-                : "w"(a_), "w"(b_), "i"(c)                              \
-                : /* No clobbers */);                                   \
-       result;                                                          \
-     })
-
  __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
  vfma_f32 (float32x2_t a, float32x2_t b, float32x2_t c)
  {
@@ -17444,6 +17132,292 @@ vdupd_laneq_u64 (uint64x2_t __a, const int __b)
    return __aarch64_vgetq_lane_u64 (__a, __b);
  }
  
+/* vext  */
+
+__extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
+vext_f32 (float32x2_t __a, float32x2_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 2);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
+#endif
+}
+
+__extension__ static __inline float64x1_t __attribute__ ((__always_inline__))
+vext_f64 (float64x1_t __a, float64x1_t __b, __const int __c)
+{
+  /* The only possible index to the assembler instruction returns element 0.  */
+  __builtin_aarch64_im_lane_boundsi (__c, 1);
+  return __a;
+}
+__extension__ static __inline poly8x8_t __attribute__ ((__always_inline__))
+vext_p8 (poly8x8_t __a, poly8x8_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 8);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint8x8_t)
+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
+#endif
+}
+
+__extension__ static __inline poly16x4_t __attribute__ ((__always_inline__))
+vext_p16 (poly16x4_t __a, poly16x4_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 4);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a,
+      (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
+#endif
+}
+
+__extension__ static __inline int8x8_t __attribute__ ((__always_inline__))
+vext_s8 (int8x8_t __a, int8x8_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 8);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint8x8_t)
+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
+#endif
+}
+
+__extension__ static __inline int16x4_t __attribute__ ((__always_inline__))
+vext_s16 (int16x4_t __a, int16x4_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 4);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a,
+      (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
+#endif
+}
+
+__extension__ static __inline int32x2_t __attribute__ ((__always_inline__))
+vext_s32 (int32x2_t __a, int32x2_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 2);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
+#endif
+}
+
+__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
+vext_s64 (int64x1_t __a, int64x1_t __b, __const int __c)
+{
+  /* The only possible index to the assembler instruction returns element 0.  */
+  __builtin_aarch64_im_lane_boundsi (__c, 1);
+  return __a;
+}
+
+__extension__ static __inline uint8x8_t __attribute__ ((__always_inline__))
+vext_u8 (uint8x8_t __a, uint8x8_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 8);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint8x8_t)
+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint8x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
+#endif
+}
+
+__extension__ static __inline uint16x4_t __attribute__ ((__always_inline__))
+vext_u16 (uint16x4_t __a, uint16x4_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 4);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a,
+      (uint16x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint16x4_t) {__c, __c+1, __c+2, __c+3});
+#endif
+}
+
+__extension__ static __inline uint32x2_t __attribute__ ((__always_inline__))
+vext_u32 (uint32x2_t __a, uint32x2_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 2);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint32x2_t) {2-__c, 3-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x2_t) {__c, __c+1});
+#endif
+}
+
+__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
+vext_u64 (uint64x1_t __a, uint64x1_t __b, __const int __c)
+{
+  /* The only possible index to the assembler instruction returns element 0.  */
+  __builtin_aarch64_im_lane_boundsi (__c, 1);
+  return __a;
+}
+
+__extension__ static __inline float32x4_t __attribute__ ((__always_inline__))
+vextq_f32 (float32x4_t __a, float32x4_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 4);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a,
+      (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
+#endif
+}
+
+__extension__ static __inline float64x2_t __attribute__ ((__always_inline__))
+vextq_f64 (float64x2_t __a, float64x2_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 2);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
+#endif
+}
+
+__extension__ static __inline poly8x16_t __attribute__ ((__always_inline__))
+vextq_p8 (poly8x16_t __a, poly8x16_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 16);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint8x16_t)
+      {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
+       24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
+       __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
+#endif
+}
+
+__extension__ static __inline poly16x8_t __attribute__ ((__always_inline__))
+vextq_p16 (poly16x8_t __a, poly16x8_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 8);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint16x8_t)
+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
+#endif
+}
+
+__extension__ static __inline int8x16_t __attribute__ ((__always_inline__))
+vextq_s8 (int8x16_t __a, int8x16_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 16);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint8x16_t)
+      {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
+       24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
+       __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
+#endif
+}
+
+__extension__ static __inline int16x8_t __attribute__ ((__always_inline__))
+vextq_s16 (int16x8_t __a, int16x8_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 8);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint16x8_t)
+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
+#endif
+}
+
+__extension__ static __inline int32x4_t __attribute__ ((__always_inline__))
+vextq_s32 (int32x4_t __a, int32x4_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 4);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a,
+      (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
+#endif
+}
+
+__extension__ static __inline int64x2_t __attribute__ ((__always_inline__))
+vextq_s64 (int64x2_t __a, int64x2_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 2);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
+#endif
+}
+
+__extension__ static __inline uint8x16_t __attribute__ ((__always_inline__))
+vextq_u8 (uint8x16_t __a, uint8x16_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 16);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint8x16_t)
+      {16-__c, 17-__c, 18-__c, 19-__c, 20-__c, 21-__c, 22-__c, 23-__c,
+       24-__c, 25-__c, 26-__c, 27-__c, 28-__c, 29-__c, 30-__c, 31-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint8x16_t)
+      {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7,
+       __c+8, __c+9, __c+10, __c+11, __c+12, __c+13, __c+14, __c+15});
+#endif
+}
+
+__extension__ static __inline uint16x8_t __attribute__ ((__always_inline__))
+vextq_u16 (uint16x8_t __a, uint16x8_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 8);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint16x8_t)
+      {8-__c, 9-__c, 10-__c, 11-__c, 12-__c, 13-__c, 14-__c, 15-__c});
+#else
+  return __builtin_shuffle (__a, __b,
+      (uint16x8_t) {__c, __c+1, __c+2, __c+3, __c+4, __c+5, __c+6, __c+7});
+#endif
+}
+
+__extension__ static __inline uint32x4_t __attribute__ ((__always_inline__))
+vextq_u32 (uint32x4_t __a, uint32x4_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 4);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a,
+      (uint32x4_t) {4-__c, 5-__c, 6-__c, 7-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint32x4_t) {__c, __c+1, __c+2, __c+3});
+#endif
+}
+
+__extension__ static __inline uint64x2_t __attribute__ ((__always_inline__))
+vextq_u64 (uint64x2_t __a, uint64x2_t __b, __const int __c)
+{
+  __builtin_aarch64_im_lane_boundsi (__c, 2);
+#ifdef __AARCH64EB__
+  return __builtin_shuffle (__b, __a, (uint64x2_t) {2-__c, 3-__c});
+#else
+  return __builtin_shuffle (__a, __b, (uint64x2_t) {__c, __c+1});
+#endif
+}
+
  /* vfma_lane  */
  
  __extension__ static __inline float32x2_t __attribute__ ((__always_inline__))
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index c537c3780eea95fa315c82bb36ac7f91f0f920fd..05611f4cd61bc1f1e01766ced61abd5476623554 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -270,6 +270,7 @@
      UNSPEC_UZP2                ; Used in vector permute patterns.
      UNSPEC_TRN1                ; Used in vector permute patterns.
      UNSPEC_TRN2                ; Used in vector permute patterns.
+    UNSPEC_EXT         ; Used in aarch64-simd.md.
      UNSPEC_AESE                ; Used in aarch64-simd.md.
      UNSPEC_AESD         ; Used in aarch64-simd.md.
      UNSPEC_AESMC        ; Used in aarch64-simd.md.
author	Alan Lawrence <alan.lawrence@arm.com>
	Thu, 29 May 2014 16:57:42 +0000 (16:57 +0000)
committer	Alan Lawrence <alalaw01@gcc.gnu.org>
	Thu, 29 May 2014 16:57:42 +0000 (16:57 +0000)
gcc/ChangeLog		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-builtins.c		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-simd-builtins.def		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64.c		patch \| blob \| blame \| history
gcc/config/aarch64/arm_neon.h		patch \| blob \| blame \| history
gcc/config/aarch64/iterators.md		patch \| blob \| blame \| history