[AARCH64] Add support for vector and scalar floating-point immediate loads.

author James Greenhalgh <james.greenhalgh@arm.com>

Mon, 7 Jan 2013 15:22:06 +0000 (15:22 +0000)

committer James Greenhalgh <jgreenhalgh@gcc.gnu.org>

Mon, 7 Jan 2013 15:22:06 +0000 (15:22 +0000)
author James Greenhalgh <james.greenhalgh@arm.com>
Mon, 7 Jan 2013 15:22:06 +0000 (15:22 +0000)
committer James Greenhalgh <jgreenhalgh@gcc.gnu.org>
Mon, 7 Jan 2013 15:22:06 +0000 (15:22 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 0c5d0976b9768cde9c83c2bd3a722751ef0c3c8b..7933c600f05b14f2b15d6b43e2c2044c585b91a0 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,30 @@
+2013-01-07  James Greenhalgh  <james.greenhalgh@arm.com>
+
+       * config/aarch64/aarch64-protos.h
+       (aarch64_const_double_zero_rtx_p): Rename to...
+       (aarch64_float_const_zero_rtx_p): ...this.
+       (aarch64_float_const_representable_p): New.
+       (aarch64_output_simd_mov_immediate): Likewise.
+       * config/aarch64/aarch64-simd.md (*aarch64_simd_mov<mode>): Refactor
+       move immediate case.
+       * config/aarch64/aarch64.c
+       (aarch64_const_double_zero_rtx_p): Rename to...
+       (aarch64_float_const_zero_rtx_p): ...this.
+       (aarch64_print_operand): Allow printing of new constants.
+       (aarch64_valid_floating_const): New.
+       (aarch64_legitimate_constant_p): Check for valid floating-point
+       constants.
+       (aarch64_simd_valid_immediate): Likewise.
+       (aarch64_vect_float_const_representable_p): New.
+       (aarch64_float_const_representable_p): Likewise.
+       (aarch64_simd_imm_zero_p): Also allow for floating-point 0.0.
+       (aarch64_output_simd_mov_immediate): New.
+       * config/aarch64/aarch64.md (*movsf_aarch64): Add new alternative.
+       (*movdf_aarch64): Likewise.
+       * config/aarch64/constraints.md (Ufc): New.
+       (Y): call aarch64_float_const_zero_rtx.
+       * config/aarch64/predicates.md (aarch64_fp_compare_operand): New.
+
  2013-01-07  Richard Biener  <rguenther@suse.de>
  
         PR tree-optimization/55888
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h

index bcd3bb1b8b7f5f939554c9cf80dd4106b36e7ce2..ba96cd66618acb1404dd1af04e4250b64a99a67b 100644 (file)
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -136,8 +136,8 @@ struct tune_params
  
  HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
  bool aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode);
-bool aarch64_const_double_zero_rtx_p (rtx);
  bool aarch64_constant_address_p (rtx);
+bool aarch64_float_const_zero_rtx_p (rtx);
  bool aarch64_function_arg_regno_p (unsigned);
  bool aarch64_gen_movmemqi (rtx *);
  bool aarch64_is_extend_from_extract (enum machine_mode, rtx, rtx);
@@ -215,6 +215,9 @@ void aarch64_split_128bit_move (rtx, rtx);
  
  bool aarch64_split_128bit_move_p (rtx, rtx);
  
+/* Check for a legitimate floating point constant for FMOV.  */
+bool aarch64_float_const_representable_p (rtx);
+
  #if defined (RTX_CODE)
  
  bool aarch64_legitimate_address_p (enum machine_mode, rtx, RTX_CODE, bool);
@@ -246,4 +249,5 @@ extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
  extern bool
  aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
  
+char* aarch64_output_simd_mov_immediate (rtx *, enum machine_mode, unsigned);
  #endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index febf71d37c464558e6b2cb1a3615314c2b8d71e8..d4b52c385a71019e07191f729f30d9bdbe339aa6 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -394,34 +394,8 @@
       case 4: return "ins\t%0.d[0], %1";
       case 5: return "mov\t%0, %1";
       case 6:
-       {
-       int is_valid;
-       unsigned char widthc;
-       int width;
-       static char templ[40];
-       int shift = 0, mvn = 0;
-       const char *mnemonic;
-       int length = 0;
-
-       is_valid =
-         aarch64_simd_immediate_valid_for_move (operands[1], <MODE>mode,
-                                                &operands[1], &width, &widthc,
-                                                &mvn, &shift);
-       gcc_assert (is_valid != 0);
-
-       mnemonic = mvn ? "mvni" : "movi";
-       if (widthc != 'd')
-         length += snprintf (templ, sizeof (templ),
-                             "%s\t%%0.%d%c, %%1",
-                             mnemonic, 64 / width, widthc);
-       else
-         length += snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic);
-
-       if (shift != 0)
-         length += snprintf (templ + length, sizeof (templ) - length,
-                             ", lsl %d", shift);
-       return templ;
-       }
+       return aarch64_output_simd_mov_immediate (&operands[1],
+                                                 <MODE>mode, 64);
       default: gcc_unreachable ();
       }
  }
@@ -438,39 +412,19 @@
     && (register_operand (operands[0], <MODE>mode)
         || register_operand (operands[1], <MODE>mode))"
  {
-   switch (which_alternative)
-     {
-     case 0: return "ld1\t{%0.<Vtype>}, %1";
-     case 1: return "st1\t{%1.<Vtype>}, %0";
-     case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
-     case 3: return "umov\t%0, %1.d[0]\;umov\t%H0, %1.d[1]";
-     case 4: return "ins\t%0.d[0], %1\;ins\t%0.d[1], %H1";
-     case 5: return "#";
-     case 6:
-       {
-       int is_valid;
-       unsigned char widthc;
-       int width;
-       static char templ[40];
-       int shift = 0, mvn = 0;
-
-       is_valid =
-         aarch64_simd_immediate_valid_for_move (operands[1], <MODE>mode,
-                                                &operands[1], &width, &widthc,
-                                                &mvn, &shift);
-       gcc_assert (is_valid != 0);
-       if (shift)
-         snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d",
-                   mvn ? "mvni" : "movi",
-                   128 / width, widthc, shift);
-       else
-         snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1",
-                   mvn ? "mvni" : "movi",
-                   128 / width, widthc);
-       return templ;
-       }
-     default: gcc_unreachable ();
-     }
+  switch (which_alternative)
+    {
+    case 0: return "ld1\t{%0.<Vtype>}, %1";
+    case 1: return "st1\t{%1.<Vtype>}, %0";
+    case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
+    case 3: return "umov\t%0, %1.d[0]\;umov\t%H0, %1.d[1]";
+    case 4: return "ins\t%0.d[0], %1\;ins\t%0.d[1], %H1";
+    case 5: return "#";
+    case 6:
+       return aarch64_output_simd_mov_immediate (&operands[1],
+                                                 <MODE>mode, 128);
+    default: gcc_unreachable ();
+    }
  }
    [(set_attr "simd_type" "simd_load1,simd_store1,simd_move,simd_movgp,simd_insgp,simd_move,simd_move_imm")
     (set_attr "simd_mode" "<MODE>")
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

index 6bba8cc8a6dd03a950b5f2216e12e8f7010a3c79..ba8287ff026a1e2d90ba1be6a22f185ad985405a 100644 (file)
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3028,7 +3028,7 @@ aarch64_legitimate_address_p (enum machine_mode mode, rtx x,
  
  /* Return TRUE if rtx X is immediate constant 0.0 */
  bool
-aarch64_const_double_zero_rtx_p (rtx x)
+aarch64_float_const_zero_rtx_p (rtx x)
  {
    REAL_VALUE_TYPE r;
  
@@ -3369,7 +3369,8 @@ aarch64_print_operand (FILE *f, rtx x, char code)
      case 'x':
        /* Print a general register name or the zero register (32-bit or
           64-bit).  */
-      if (x == const0_rtx)
+      if (x == const0_rtx
+         || (CONST_DOUBLE_P (x) && aarch64_float_const_zero_rtx_p (x)))
         {
           asm_fprintf (f, "%s%czr", REGISTER_PREFIX, code);
           break;
@@ -3420,11 +3421,46 @@ aarch64_print_operand (FILE *f, rtx x, char code)
           break;
  
         case CONST_VECTOR:
-         gcc_assert (aarch64_const_vec_all_same_int_p (x, HOST_WIDE_INT_MIN,
-                                                       HOST_WIDE_INT_MAX));
-         asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
+         if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_INT)
+           {
+             gcc_assert (aarch64_const_vec_all_same_int_p (x,
+                                                           HOST_WIDE_INT_MIN,
+                                                           HOST_WIDE_INT_MAX));
+             asm_fprintf (f, "%wd", INTVAL (CONST_VECTOR_ELT (x, 0)));
+           }
+         else if (aarch64_simd_imm_zero_p (x, GET_MODE (x)))
+           {
+             fputc ('0', f);
+           }
+         else
+           gcc_unreachable ();
           break;
  
+       case CONST_DOUBLE:
+         /* CONST_DOUBLE can represent a double-width integer.
+            In this case, the mode of x is VOIDmode.  */
+         if (GET_MODE (x) == VOIDmode)
+           ; /* Do Nothing.  */
+         else if (aarch64_float_const_zero_rtx_p (x))
+           {
+             fputc ('0', f);
+             break;
+           }
+         else if (aarch64_float_const_representable_p (x))
+           {
+#define buf_size 20
+             char float_buf[buf_size] = {'\0'};
+             REAL_VALUE_TYPE r;
+             REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+             real_to_decimal_for_mode (float_buf, &r,
+                                       buf_size, buf_size,
+                                       1, GET_MODE (x));
+             asm_fprintf (asm_out_file, "%s", float_buf);
+             break;
+#undef buf_size
+           }
+         output_operand_lossage ("invalid constant");
+         return;
         default:
           output_operand_lossage ("invalid operand");
           return;
@@ -5006,6 +5042,27 @@ aarch64_legitimate_pic_operand_p (rtx x)
    return true;
  }
  
+/* Return true if X holds either a quarter-precision or
+     floating-point +0.0 constant.  */
+static bool
+aarch64_valid_floating_const (enum machine_mode mode, rtx x)
+{
+  if (!CONST_DOUBLE_P (x))
+    return false;
+
+  /* TODO: We could handle moving 0.0 to a TFmode register,
+     but first we would like to refactor the movtf_aarch64
+     to be more amicable to split moves properly and
+     correctly gate on TARGET_SIMD.  For now - reject all
+     constants which are not to SFmode or DFmode registers.  */
+  if (!(mode == SFmode || mode == DFmode))
+    return false;
+
+  if (aarch64_float_const_zero_rtx_p (x))
+    return true;
+  return aarch64_float_const_representable_p (x);
+}
+
  static bool
  aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
  {
@@ -5019,8 +5076,8 @@ aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
    if ((GET_CODE (x) == CONST_VECTOR
         && aarch64_simd_valid_immediate (x, mode, false,
                                         NULL, NULL, NULL, NULL, NULL) != -1)
-      || CONST_INT_P (x))
-    return !targetm.cannot_force_const_mem (mode, x);
+      || CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
+       return !targetm.cannot_force_const_mem (mode, x);
  
    if (GET_CODE (x) == HIGH
        && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
@@ -5975,6 +6032,44 @@ sizetochar (int size)
      }
  }
  
+/* Return true iff x is a uniform vector of floating-point
+   constants, and the constant can be represented in
+   quarter-precision form.  Note, as aarch64_float_const_representable
+   rejects both +0.0 and -0.0, we will also reject +0.0 and -0.0.  */
+static bool
+aarch64_vect_float_const_representable_p (rtx x)
+{
+  int i = 0;
+  REAL_VALUE_TYPE r0, ri;
+  rtx x0, xi;
+
+  if (GET_MODE_CLASS (GET_MODE (x)) != MODE_VECTOR_FLOAT)
+    return false;
+
+  x0 = CONST_VECTOR_ELT (x, 0);
+  if (!CONST_DOUBLE_P (x0))
+    return false;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r0, x0);
+
+  for (i = 1; i < CONST_VECTOR_NUNITS (x); i++)
+    {
+      xi = CONST_VECTOR_ELT (x, i);
+      if (!CONST_DOUBLE_P (xi))
+       return false;
+
+      REAL_VALUE_FROM_CONST_DOUBLE (ri, xi);
+      if (!REAL_VALUES_EQUAL (r0, ri))
+       return false;
+    }
+
+  return aarch64_float_const_representable_p (x0);
+}
+
+/* TODO: This function returns values similar to those
+   returned by neon_valid_immediate in gcc/config/arm/arm.c
+   but the API here is different enough that these magic numbers
+   are not used.  It should be sufficient to return true or false.  */
  static int
  aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
                               rtx *modconst, int *elementwidth,
@@ -6004,9 +6099,32 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
    unsigned int invmask = inverse ? 0xff : 0;
    int eshift, emvn;
  
-  /* TODO: Vectors of float constants.  */
    if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
-    return -1;
+    {
+      bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode);
+      int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0)));
+
+      if (!(simd_imm_zero
+           || aarch64_vect_float_const_representable_p (op)))
+       return -1;
+
+       if (modconst)
+         *modconst = CONST_VECTOR_ELT (op, 0);
+
+       if (elementwidth)
+         *elementwidth = elem_width;
+
+       if (elementchar)
+         *elementchar = sizetochar (elem_width);
+
+       if (shift)
+         *shift = 0;
+
+       if (simd_imm_zero)
+         return 19;
+       else
+         return 18;
+    }
  
    /* Splat vector constant out into a byte vector.  */
    for (i = 0; i < n_elts; i++)
@@ -6161,8 +6279,8 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
  
  /* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction
     (or, implicitly, MVNI) immediate.  Write back width per element
-   to *ELEMENTWIDTH (or zero for float elements), and a modified constant
-   (whatever should be output for a MOVI instruction) in *MODCONST.  */
+   to *ELEMENTWIDTH, and a modified constant (whatever should be output
+   for a MOVI instruction) in *MODCONST.  */
  int
  aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode,
                                        rtx *modconst, int *elementwidth,
@@ -6233,22 +6351,13 @@ aarch64_simd_shift_imm_p (rtx x, enum machine_mode mode, bool left)
      return aarch64_const_vec_all_same_int_p (x, 1, bit_width);
  }
  
+/* Return true if X is a uniform vector where all elements
+   are either the floating-point constant 0.0 or the
+   integer constant 0.  */
  bool
  aarch64_simd_imm_zero_p (rtx x, enum machine_mode mode)
  {
-  int nunits;
-  int i;
-
- if (GET_CODE (x) != CONST_VECTOR)
-   return false;
-
-  nunits = GET_MODE_NUNITS (mode);
-
-  for (i = 0; i < nunits; i++)
-    if (INTVAL (CONST_VECTOR_ELT (x, i)) != 0)
-      return false;
-
-  return true;
+  return x == CONST0_RTX (mode);
  }
  
  bool
@@ -6799,6 +6908,139 @@ aarch64_c_mode_for_suffix (char suffix)
    return VOIDmode;
  }
  
+/* We can only represent floating point constants which will fit in
+   "quarter-precision" values.  These values are characterised by
+   a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
+   by:
+
+   (-1)^s * (n/16) * 2^r
+
+   Where:
+     's' is the sign bit.
+     'n' is an integer in the range 16 <= n <= 31.
+     'r' is an integer in the range -3 <= r <= 4.  */
+
+/* Return true iff X can be represented by a quarter-precision
+   floating point immediate operand X.  Note, we cannot represent 0.0.  */
+bool
+aarch64_float_const_representable_p (rtx x)
+{
+  /* This represents our current view of how many bits
+     make up the mantissa.  */
+  int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
+  int sign, exponent;
+  unsigned HOST_WIDE_INT mantissa, mask;
+  HOST_WIDE_INT m1, m2;
+  REAL_VALUE_TYPE r, m;
+
+  if (!CONST_DOUBLE_P (x))
+    return false;
+
+  REAL_VALUE_FROM_CONST_DOUBLE (r, x);
+
+  /* We cannot represent infinities, NaNs or +/-zero.  We won't
+     know if we have +zero until we analyse the mantissa, but we
+     can reject the other invalid values.  */
+  if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
+      || REAL_VALUE_MINUS_ZERO (r))
+    return false;
+
+  /* Extract sign and exponent.  */
+  sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
+  r = real_value_abs (&r);
+  exponent = REAL_EXP (&r);
+
+  /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
+     highest (sign) bit, with a fixed binary point at bit point_pos.
+     m1 holds the low part of the mantissa, m2 the high part.
+     WARNING: If we ever have a representation using more than 2 * H_W_I - 1
+     bits for the mantissa, this can fail (low bits will be lost).  */
+  real_ldexp (&m, &r, point_pos - exponent);
+  REAL_VALUE_TO_INT (&m1, &m2, m);
+
+  /* If the low part of the mantissa has bits set we cannot represent
+     the value.  */
+  if (m1 != 0)
+    return false;
+  /* We have rejected the lower HOST_WIDE_INT, so update our
+     understanding of how many bits lie in the mantissa and
+     look only at the high HOST_WIDE_INT.  */
+  mantissa = m2;
+  point_pos -= HOST_BITS_PER_WIDE_INT;
+
+  /* We can only represent values with a mantissa of the form 1.xxxx.  */
+  mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
+  if ((mantissa & mask) != 0)
+    return false;
+
+  /* Having filtered unrepresentable values, we may now remove all
+     but the highest 5 bits.  */
+  mantissa >>= point_pos - 5;
+
+  /* We cannot represent the value 0.0, so reject it.  This is handled
+     elsewhere.  */
+  if (mantissa == 0)
+    return false;
+
+  /* Then, as bit 4 is always set, we can mask it off, leaving
+     the mantissa in the range [0, 15].  */
+  mantissa &= ~(1 << 4);
+  gcc_assert (mantissa <= 15);
+
+  /* GCC internally does not use IEEE754-like encoding (where normalized
+     significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.c).
+     Our mantissa values are shifted 4 places to the left relative to
+     normalized IEEE754 so we must modify the exponent returned by REAL_EXP
+     by 5 places to correct for GCC's representation.  */
+  exponent = 5 - exponent;
+
+  return (exponent >= 0 && exponent <= 7);
+}
+
+char*
+aarch64_output_simd_mov_immediate (rtx *const_vector,
+                                  enum machine_mode mode,
+                                  unsigned width)
+{
+  int is_valid;
+  unsigned char widthc;
+  int lane_width_bits;
+  static char templ[40];
+  int shift = 0, mvn = 0;
+  const char *mnemonic;
+  unsigned int lane_count = 0;
+
+  is_valid =
+    aarch64_simd_immediate_valid_for_move (*const_vector, mode,
+                                          const_vector, &lane_width_bits,
+                                          &widthc, &mvn, &shift);
+  gcc_assert (is_valid);
+
+  mode = GET_MODE_INNER (mode);
+  if (mode == SFmode || mode == DFmode)
+    {
+      bool zero_p =
+       aarch64_float_const_zero_rtx_p (*const_vector);
+      gcc_assert (shift == 0);
+      mnemonic = zero_p ? "movi" : "fmov";
+    }
+  else
+    mnemonic = mvn ? "mvni" : "movi";
+
+  gcc_assert (lane_width_bits != 0);
+  lane_count = width / lane_width_bits;
+
+  if (lane_count == 1)
+    snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic);
+  else if (shift)
+    snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d",
+             mnemonic, lane_count, widthc, shift);
+  else
+    snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1",
+             mnemonic, lane_count, widthc);
+  return templ;
+}
+
  /* Split operands into moves from op[1] + op[2] into op[0].  */
  
  void
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md

index ec65b3c2a11e65e8caffd4225961e1666153d8f6..70df659b89e621247837a7c19ebd11bd9906011d 100644 (file)
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -915,38 +915,44 @@
  )
  
  (define_insn "*movsf_aarch64"
-  [(set (match_operand:SF 0 "nonimmediate_operand" "= w,?r,w,w,m,r,m ,r")
-       (match_operand:SF 1 "general_operand"      "?rY, w,w,m,w,m,rY,r"))]
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=w, ?r,w,w  ,w,m,r,m ,r")
+       (match_operand:SF 1 "general_operand"      "?rY, w,w,Ufc,m,w,m,rY,r"))]
    "TARGET_FLOAT && (register_operand (operands[0], SFmode)
      || register_operand (operands[1], SFmode))"
    "@
     fmov\\t%s0, %w1
     fmov\\t%w0, %s1
     fmov\\t%s0, %s1
+   fmov\\t%s0, %1
     ldr\\t%s0, %1
     str\\t%s1, %0
     ldr\\t%w0, %1
     str\\t%w1, %0
     mov\\t%w0, %w1"
-  [(set_attr "v8type" "fmovi2f,fmovf2i,fmov,fpsimd_load,fpsimd_store,fpsimd_load,fpsimd_store,fmov")
+  [(set_attr "v8type" "fmovi2f,fmovf2i,\
+                      fmov,fconst,fpsimd_load,\
+                      fpsimd_store,fpsimd_load,fpsimd_store,fmov")
     (set_attr "mode" "SF")]
  )
  
  (define_insn "*movdf_aarch64"
-  [(set (match_operand:DF 0 "nonimmediate_operand" "= w,?r,w,w,m,r,m ,r")
-       (match_operand:DF 1 "general_operand"      "?rY, w,w,m,w,m,rY,r"))]
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=w, ?r,w,w  ,w,m,r,m ,r")
+       (match_operand:DF 1 "general_operand"      "?rY, w,w,Ufc,m,w,m,rY,r"))]
    "TARGET_FLOAT && (register_operand (operands[0], DFmode)
      || register_operand (operands[1], DFmode))"
    "@
     fmov\\t%d0, %x1
     fmov\\t%x0, %d1
     fmov\\t%d0, %d1
+   fmov\\t%d0, %1
     ldr\\t%d0, %1
     str\\t%d1, %0
     ldr\\t%x0, %1
     str\\t%x1, %0
     mov\\t%x0, %x1"
-  [(set_attr "v8type" "fmovi2f,fmovf2i,fmov,fpsimd_load,fpsimd_store,fpsimd_load,fpsimd_store,move")
+  [(set_attr "v8type" "fmovi2f,fmovf2i,\
+                      fmov,fconst,fpsimd_load,\
+                      fpsimd_store,fpsimd_load,fpsimd_store,move")
     (set_attr "mode" "DF")]
  )
  
@@ -991,7 +997,6 @@
     (set_attr "simd" "yes,*,*,*,yes,*,*,*,*,*")]
  )
  
-
  ;; Operands 1 and 3 are tied together by the final condition; so we allow
  ;; fairly lax checking on the second memory operation.
  (define_insn "load_pair<mode>"
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md

index dae4b94b71a32487a059ed5616d4b94e11b405d2..7e33ec08862c31127b803425eb66539de72b40e5 100644 (file)
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -69,7 +69,7 @@
  (define_constraint "Y"
    "Floating point constant zero."
    (and (match_code "const_double")
-       (match_test "aarch64_const_double_zero_rtx_p (op)")))
+       (match_test "aarch64_float_const_zero_rtx_p (op)")))
  
  (define_constraint "Z"
    "Integer constant zero."
@@ -138,6 +138,12 @@
    (and (match_code "mem")
         (match_test "aarch64_simd_mem_operand_p (op)")))
  
+(define_constraint "Ufc"
+  "A floating point constant which can be used with an\
+   FMOV immediate operation."
+  (and (match_code "const_double")
+       (match_test "aarch64_float_const_representable_p (op)")))
+
  (define_constraint "Dn"
    "@internal
   A constraint that matches vector of immediates."
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md

index 80f6ce2cb37141513b564aaa55aa63f2bc5d5c1b..1dbaec10e7d7b7a9d721ae27af4c7d3c7d7bdb25 100644 (file)
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -41,7 +41,7 @@
  (define_predicate "aarch64_fp_compare_operand"
    (ior (match_operand 0 "register_operand")
         (and (match_code "const_double")
-           (match_test "aarch64_const_double_zero_rtx_p (op)"))))
+           (match_test "aarch64_float_const_zero_rtx_p (op)"))))
  
  (define_predicate "aarch64_plus_immediate"
    (and (match_code "const_int")
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index f8a4563837d0cee17639e7de501136dc9dd81cd4..178459ea04e1b1ec690054711efeba2020107815 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,14 @@
+2013-01-07  James Greenhalgh  <james.greenhalgh@arm.com>
+
+       * gcc.target/aarch64/fmovd.c: New.
+       * gcc.target/aarch64/fmovf.c: Likewise.
+       * gcc.target/aarch64/fmovd-zero.c: Likewise.
+       * gcc.target/aarch64/fmovf-zero.c: Likewise.
+       * gcc.target/aarch64/vect-fmovd.c: Likewise.
+       * gcc.target/aarch64/vect-fmovf.c: Likewise.
+       * gcc.target/aarch64/vect-fmovd-zero.c: Likewise.
+       * gcc.target/aarch64/vect-fmovf-zero.c: Likewise.
+
  2013-01-07  Richard Biener  <rguenther@suse.de>
  
         PR tree-optimization/55888
diff --git a/gcc/testsuite/gcc.target/aarch64/fmovd-zero.c b/gcc/testsuite/gcc.target/aarch64/fmovd-zero.c

new file mode 100644 (file)

index 0000000..7e4590a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fmovd-zero.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void
+foo (double *output)
+{
+  *output = 0.0;
+}
+
+/* { dg-final { scan-assembler "fmov\\td\[0-9\]+, xzr" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fmovd.c b/gcc/testsuite/gcc.target/aarch64/fmovd.c

new file mode 100644 (file)

index 0000000..c50e74e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fmovd.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void
+foo (double *output)
+{
+  *output = 4.25;
+}
+
+/* { dg-final { scan-assembler "fmov\\td\[0-9\]+, 4\\.25" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fmovf-zero.c b/gcc/testsuite/gcc.target/aarch64/fmovf-zero.c

new file mode 100644 (file)

index 0000000..5050ac3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fmovf-zero.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void
+foo (float *output)
+{
+  *output = 0.0;
+}
+
+/* { dg-final { scan-assembler "fmov\\ts\[0-9\]+, wzr" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/fmovf.c b/gcc/testsuite/gcc.target/aarch64/fmovf.c

new file mode 100644 (file)

index 0000000..0a9e215
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fmovf.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+void
+foo (float *output)
+{
+  *output = 4.25;
+}
+
+/* { dg-final { scan-assembler "fmov\\ts\[0-9\]+, 4\\.25" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fmovd-zero.c b/gcc/testsuite/gcc.target/aarch64/vect-fmovd-zero.c

new file mode 100644 (file)

index 0000000..667d227
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fmovd-zero.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all" } */
+
+#define N 32
+
+void
+foo (double *output)
+{
+  int i = 0;
+  /* Vectorizable.  */
+  for (i = 0; i < N; i++)
+    output[i] = 0.0;
+}
+
+/* { dg-final { scan-assembler "movi\\tv\[0-9\]+\\.2d, 0" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fmovd.c b/gcc/testsuite/gcc.target/aarch64/vect-fmovd.c

new file mode 100644 (file)

index 0000000..a0211c7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fmovd.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all" } */
+
+#define N 32
+
+void
+foo (double *output)
+{
+  int i = 0;
+  /* Vectorizable.  */
+  for (i = 0; i < N; i++)
+    output[i] = 4.25;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-assembler "fmov\\tv\[0-9\]+\\.2d, 4\\.25" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fmovf-zero.c b/gcc/testsuite/gcc.target/aarch64/vect-fmovf-zero.c

new file mode 100644 (file)

index 0000000..259a9d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fmovf-zero.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all" } */
+
+#define N 32
+
+void
+foo (float *output)
+{
+  int i = 0;
+  /* Vectorizable.  */
+  for (i = 0; i < N; i++)
+    output[i] = 0.0;
+}
+
+/* { dg-final { scan-assembler "movi\\tv\[0-9\]+\\.\[24\]s, 0" } } */
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-fmovf.c b/gcc/testsuite/gcc.target/aarch64/vect-fmovf.c

new file mode 100644 (file)

index 0000000..0bd21dc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect-fmovf.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all" } */
+
+#define N 32
+
+void
+foo (float *output)
+{
+  int i = 0;
+  /* Vectorizable.  */
+  for (i = 0; i < N; i++)
+    output[i] = 4.25;
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { scan-assembler "fmov\\tv\[0-9\]+\\.\[24\]s, 4\\.25" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
author	James Greenhalgh <james.greenhalgh@arm.com>
	Mon, 7 Jan 2013 15:22:06 +0000 (15:22 +0000)
committer	James Greenhalgh <jgreenhalgh@gcc.gnu.org>
	Mon, 7 Jan 2013 15:22:06 +0000 (15:22 +0000)
gcc/ChangeLog		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-protos.h		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64.c		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64.md		patch \| blob \| blame \| history
gcc/config/aarch64/constraints.md		patch \| blob \| blame \| history
gcc/config/aarch64/predicates.md		patch \| blob \| blame \| history
gcc/testsuite/ChangeLog		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/fmovd-zero.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/fmovd.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/fmovf-zero.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/fmovf.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/vect-fmovd-zero.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/vect-fmovd.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/vect-fmovf-zero.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/vect-fmovf.c	[new file with mode: 0644]	patch \| blob