aarch64: Vector move fixes for +nosimd

author Richard Sandiford <richard.sandiford@arm.com>

Tue, 13 Sep 2022 08:28:49 +0000 (09:28 +0100)

committer Richard Sandiford <richard.sandiford@arm.com>

Tue, 13 Sep 2022 08:28:49 +0000 (09:28 +0100)
author Richard Sandiford <richard.sandiford@arm.com>
Tue, 13 Sep 2022 08:28:49 +0000 (09:28 +0100)
committer Richard Sandiford <richard.sandiford@arm.com>
Tue, 13 Sep 2022 08:28:49 +0000 (09:28 +0100)
diff --git a/gcc/config/aarch64/aarch64-ldpstp.md b/gcc/config/aarch64/aarch64-ldpstp.md

index ba76a1b78f6620903ff49dd44d10a0ecce7be549..f8446e2120853954c360b9ff6085ea6a22f8b26c 100644 (file)
--- a/gcc/config/aarch64/aarch64-ldpstp.md
+++ b/gcc/config/aarch64/aarch64-ldpstp.md
@@ -83,8 +83,7 @@
         (match_operand:DREG 1 "register_operand" ""))
     (set (match_operand:DREG2 2 "memory_operand" "")
         (match_operand:DREG2 3 "register_operand" ""))]
-  "TARGET_SIMD
-   && aarch64_operands_ok_for_ldpstp (operands, false, <DREG:MODE>mode)"
+  "aarch64_operands_ok_for_ldpstp (operands, false, <DREG:MODE>mode)"
    [(parallel [(set (match_dup 0) (match_dup 1))
               (set (match_dup 2) (match_dup 3))])]
  {
@@ -96,7 +95,7 @@
         (match_operand:VQ 1 "memory_operand" ""))
     (set (match_operand:VQ2 2 "register_operand" "")
         (match_operand:VQ2 3 "memory_operand" ""))]
-  "TARGET_SIMD
+  "TARGET_FLOAT
     && aarch64_operands_ok_for_ldpstp (operands, true, <VQ:MODE>mode)
     && (aarch64_tune_params.extra_tuning_flags
         & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0"
@@ -111,7 +110,7 @@
         (match_operand:VQ 1 "register_operand" ""))
     (set (match_operand:VQ2 2 "memory_operand" "")
         (match_operand:VQ2 3 "register_operand" ""))]
-  "TARGET_SIMD
+  "TARGET_FLOAT
     && aarch64_operands_ok_for_ldpstp (operands, false, <VQ:MODE>mode)
     && (aarch64_tune_params.extra_tuning_flags
         & AARCH64_EXTRA_TUNE_NO_LDP_STP_QREGS) == 0"
@@ -306,7 +305,7 @@
     (set (match_operand:VP_2E 6 "memory_operand" "")
          (match_operand:VP_2E 7 "aarch64_reg_or_zero" ""))
     (match_dup 8)]
-  "TARGET_SIMD
+  "TARGET_FLOAT
     && aarch64_operands_adjust_ok_for_ldpstp (operands, false, <MODE>mode)"
    [(const_int 0)]
  {
@@ -327,7 +326,7 @@
     (set (match_operand:VP_2E 6 "register_operand" "")
          (match_operand:VP_2E 7 "memory_operand" ""))
     (match_dup 8)]
-  "TARGET_SIMD
+  "TARGET_FLOAT
     && aarch64_operands_adjust_ok_for_ldpstp (operands, true, <MODE>mode)"
    [(const_int 0)]
  {
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index d4662c76a582898ffae8bcaad6536d27167898cc..dc80f826100ea4feb4150f117eaf53a4f2afbbff 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -21,7 +21,7 @@
  (define_expand "mov<mode>"
    [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
         (match_operand:VALL_F16 1 "general_operand"))]
-  "TARGET_SIMD"
+  "TARGET_FLOAT"
    "
    /* Force the operand into a register if it is not an
       immediate whose use can be replaced with xzr.
@@ -52,7 +52,7 @@
  (define_expand "movmisalign<mode>"
    [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
          (match_operand:VALL_F16 1 "general_operand"))]
-  "TARGET_SIMD && !STRICT_ALIGNMENT"
+  "TARGET_FLOAT && !STRICT_ALIGNMENT"
  {
    /* This pattern is not permitted to fail during expansion: if both arguments
       are non-registers (e.g. memory := constant, which can be created by the
@@ -116,10 +116,10 @@
  
  (define_insn "*aarch64_simd_mov<VDMOV:mode>"
    [(set (match_operand:VDMOV 0 "nonimmediate_operand"
-               "=w, m,  m,  w, ?r, ?w, ?r, w")
+               "=w, m,  m,  w, ?r, ?w, ?r,  w,  w")
         (match_operand:VDMOV 1 "general_operand"
-               "m,  Dz, w,  w,  w,  r,  r, Dn"))]
-  "TARGET_SIMD
+               "m,  Dz, w,  w,  w,  r,  r, Dn, Dz"))]
+  "TARGET_FLOAT
     && (register_operand (operands[0], <MODE>mode)
         || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
  {
@@ -128,26 +128,34 @@
       case 0: return "ldr\t%d0, %1";
       case 1: return "str\txzr, %0";
       case 2: return "str\t%d1, %0";
-     case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
-     case 4: return "umov\t%0, %1.d[0]";
+     case 3:
+       if (TARGET_SIMD)
+        return "mov\t%0.<Vbtype>, %1.<Vbtype>";
+       return "fmov\t%d0, %d1";
+     case 4:
+       if (TARGET_SIMD)
+        return "umov\t%0, %1.d[0]";
+       return "fmov\t%x0, %d1";
       case 5: return "fmov\t%d0, %1";
       case 6: return "mov\t%0, %1";
       case 7:
         return aarch64_output_simd_mov_immediate (operands[1], 64);
+     case 8: return "fmov\t%d0, xzr";
       default: gcc_unreachable ();
       }
  }
    [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
                      neon_logic<q>, neon_to_gp<q>, f_mcr,\
-                    mov_reg, neon_move<q>")]
+                    mov_reg, neon_move<q>, f_mcr")
+   (set_attr "arch" "*,*,*,*,*,*,*,simd,*")]
  )
  
  (define_insn "*aarch64_simd_mov<VQMOV:mode>"
    [(set (match_operand:VQMOV 0 "nonimmediate_operand"
-               "=w, Umn,  m,  w, ?r, ?w, ?r, w")
+               "=w, Umn,  m,  w, ?r, ?w, ?r, w,  w")
         (match_operand:VQMOV 1 "general_operand"
-               "m,  Dz, w,  w,  w,  r,  r, Dn"))]
-  "TARGET_SIMD
+               "m,  Dz, w,  w,  w,  r,  r, Dn, Dz"))]
+  "TARGET_FLOAT
     && (register_operand (operands[0], <MODE>mode)
         || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
  {
@@ -167,14 +175,17 @@
         return "#";
      case 7:
         return aarch64_output_simd_mov_immediate (operands[1], 128);
+    case 8:
+       return "fmov\t%d0, xzr";
      default:
         gcc_unreachable ();
      }
  }
    [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
                      neon_logic<q>, multiple, multiple,\
-                    multiple, neon_move<q>")
-   (set_attr "length" "4,4,4,4,8,8,8,4")]
+                    multiple, neon_move<q>, fmov")
+   (set_attr "length" "4,4,4,4,8,8,8,4,4")
+   (set_attr "arch" "*,*,*,simd,*,*,*,simd,*")]
  )
  
  ;; When storing lane zero we can use the normal STR and its more permissive
@@ -195,7 +206,7 @@
         (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
     (set (match_operand:DREG2 2 "register_operand" "=w")
         (match_operand:DREG2 3 "memory_operand" "m"))]
-  "TARGET_SIMD
+  "TARGET_FLOAT
     && rtx_equal_p (XEXP (operands[3], 0),
                    plus_constant (Pmode,
                                   XEXP (operands[1], 0),
@@ -209,7 +220,7 @@
         (match_operand:DREG 1 "register_operand" "w"))
     (set (match_operand:DREG2 2 "memory_operand" "=m")
         (match_operand:DREG2 3 "register_operand" "w"))]
-  "TARGET_SIMD
+  "TARGET_FLOAT
     && rtx_equal_p (XEXP (operands[2], 0),
                    plus_constant (Pmode,
                                   XEXP (operands[0], 0),
@@ -223,7 +234,7 @@
         (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
     (set (match_operand:VQ2 2 "register_operand" "=w")
         (match_operand:VQ2 3 "memory_operand" "m"))]
-  "TARGET_SIMD
+  "TARGET_FLOAT
      && rtx_equal_p (XEXP (operands[3], 0),
                     plus_constant (Pmode,
                                XEXP (operands[1], 0),
@@ -237,10 +248,11 @@
         (match_operand:VQ 1 "register_operand" "w"))
     (set (match_operand:VQ2 2 "memory_operand" "=m")
         (match_operand:VQ2 3 "register_operand" "w"))]
-  "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
-               plus_constant (Pmode,
-                              XEXP (operands[0], 0),
-                              GET_MODE_SIZE (<VQ:MODE>mode)))"
+  "TARGET_FLOAT
+   && rtx_equal_p (XEXP (operands[2], 0),
+                  plus_constant (Pmode,
+                                 XEXP (operands[0], 0),
+                                 GET_MODE_SIZE (<VQ:MODE>mode)))"
    "stp\\t%q1, %q3, %z0"
    [(set_attr "type" "neon_stp_q")]
  )
@@ -248,8 +260,9 @@
  
  (define_split
    [(set (match_operand:VQMOV 0 "register_operand" "")
-      (match_operand:VQMOV 1 "register_operand" ""))]
-  "TARGET_SIMD && reload_completed
+       (match_operand:VQMOV 1 "register_operand" ""))]
+  "TARGET_FLOAT
+   && reload_completed
     && GP_REGNUM_P (REGNO (operands[0]))
     && GP_REGNUM_P (REGNO (operands[1]))"
    [(const_int 0)]
@@ -261,7 +274,8 @@
  (define_split
    [(set (match_operand:VQMOV 0 "register_operand" "")
          (match_operand:VQMOV 1 "register_operand" ""))]
-  "TARGET_SIMD && reload_completed
+  "TARGET_FLOAT
+   && reload_completed
     && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
         || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
    [(const_int 0)]
@@ -273,7 +287,7 @@
  (define_expand "@aarch64_split_simd_mov<mode>"
    [(set (match_operand:VQMOV 0)
         (match_operand:VQMOV 1))]
-  "TARGET_SIMD"
+  "TARGET_FLOAT"
    {
      rtx dst = operands[0];
      rtx src = operands[1];
@@ -306,13 +320,20 @@
          (vec_select:<VHALF>
            (match_operand:VQMOV 1 "register_operand")
            (match_operand 2 "ascending_int_parallel")))]
-  "TARGET_SIMD"
+  "TARGET_FLOAT"
+  {
+    if (vect_par_cnst_lo_half (operands[2], <MODE>mode))
+      {
+       emit_move_insn (operands[0], gen_lowpart (<VHALF>mode, operands[1]));
+       DONE;
+      }
+  }
  )
  
  (define_expand "aarch64_get_low<mode>"
    [(match_operand:<VHALF> 0 "register_operand")
     (match_operand:VQMOV 1 "register_operand")]
-  "TARGET_SIMD"
+  "TARGET_FLOAT"
    {
      rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
      emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], lo));
@@ -323,7 +344,7 @@
  (define_expand "aarch64_get_high<mode>"
    [(match_operand:<VHALF> 0 "register_operand")
     (match_operand:VQMOV 1 "register_operand")]
-  "TARGET_SIMD"
+  "TARGET_FLOAT"
    {
      rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
      emit_insn (gen_aarch64_get_half<mode> (operands[0], operands[1], hi));
@@ -350,15 +371,17 @@
  )
  
  (define_insn "aarch64_simd_mov_from_<mode>high"
-  [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r")
+  [(set (match_operand:<VHALF> 0 "register_operand" "=w,?r,?r")
          (vec_select:<VHALF>
-          (match_operand:VQMOV_NO2E 1 "register_operand" "w,w")
+          (match_operand:VQMOV_NO2E 1 "register_operand" "w,w,w")
            (match_operand:VQMOV_NO2E 2 "vect_par_cnst_hi_half" "")))]
-  "TARGET_SIMD"
+  "TARGET_FLOAT"
    "@
-   dup\\t%d0, %1.d[1]
-   umov\t%0, %1.d[1]"
-  [(set_attr "type" "neon_dup<q>,neon_to_gp<q>")
+   dup\t%d0, %1.d[1]
+   umov\t%0, %1.d[1]
+   fmov\t%0, %1.d[1]"
+  [(set_attr "type" "neon_dup<q>,neon_to_gp<q>,f_mrc")
+   (set_attr "arch" "simd,simd,*")
     (set_attr "length" "4")]
  )
  
@@ -4226,12 +4249,22 @@
    [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
  )
  
+(define_insn "*aarch64_get_high<mode>"
+  [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r")
+       (vec_select:<VEL>
+         (match_operand:VQ_2E 1 "register_operand" "w")
+         (parallel [(match_operand:SI 2 "immediate_operand")])))]
+  "TARGET_FLOAT && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 1"
+  "fmov\t%0, %1.d[1]"
+  [(set_attr "type" "f_mrc")]
+)
+
  (define_insn "load_pair_lanes<mode>"
    [(set (match_operand:<VDBL> 0 "register_operand" "=w")
         (vec_concat:<VDBL>
            (match_operand:VDCSIF 1 "memory_operand" "Utq")
            (match_operand:VDCSIF 2 "memory_operand" "m")))]
-  "TARGET_SIMD
+  "TARGET_FLOAT
     && aarch64_mergeable_load_pair_p (<VDBL>mode, operands[1], operands[2])"
    "ldr\\t%<single_dtype>0, %1"
    [(set_attr "type" "neon_load1_1reg<dblq>")]
@@ -4261,7 +4294,7 @@
         (vec_concat:<VDBL>
            (match_operand:VDCSIF 1 "register_operand" "w, r")
            (match_operand:VDCSIF 2 "register_operand" "w, r")))]
-  "TARGET_SIMD"
+  "TARGET_FLOAT"
    "@
     stp\t%<single_type>1, %<single_type>2, %y0
     stp\t%<single_wx>1, %<single_wx>2, %y0"
@@ -4276,39 +4309,44 @@
  ;; the register alternatives either don't accept or themselves disparage.
  
  (define_insn "*aarch64_combine_internal<mode>"
-  [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, Umn, Umn")
+  [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, w, Umn, Umn")
         (vec_concat:<VDBL>
-         (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, ?w, ?r")
-         (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, w, ?r")))]
-  "TARGET_SIMD
+         (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, 0, ?w, ?r")
+         (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, ?r, Utv, w, ?r")))]
+  "TARGET_FLOAT
     && !BYTES_BIG_ENDIAN
     && (register_operand (operands[0], <VDBL>mode)
         || register_operand (operands[2], <MODE>mode))"
    "@
     ins\t%0.<single_type>[1], %2.<single_type>[0]
     ins\t%0.<single_type>[1], %<single_wx>2
+   fmov\t%0.d[1], %2
     ld1\t{%0.<single_type>}[1], %2
     stp\t%<single_type>1, %<single_type>2, %y0
     stp\t%<single_wx>1, %<single_wx>2, %y0"
-  [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, neon_load1_one_lane<dblq>, neon_stp, store_16")]
+  [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, f_mcr,
+                    neon_load1_one_lane<dblq>, neon_stp, store_16")
+   (set_attr "arch" "simd,simd,*,simd,*,*")]
  )
  
  (define_insn "*aarch64_combine_internal_be<mode>"
-  [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, Umn, Umn")
+  [(set (match_operand:<VDBL> 0 "aarch64_reg_or_mem_pair_operand" "=w, w, w, w, Umn, Umn")
         (vec_concat:<VDBL>
-         (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, Utv, ?w, ?r")
-         (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, ?w, ?r")))]
-  "TARGET_SIMD
+         (match_operand:VDCSIF 2 "aarch64_simd_nonimmediate_operand" "w, ?r, ?r, Utv, ?w, ?r")
+         (match_operand:VDCSIF 1 "register_operand" "0, 0, 0, 0, ?w, ?r")))]
+  "TARGET_FLOAT
     && BYTES_BIG_ENDIAN
     && (register_operand (operands[0], <VDBL>mode)
         || register_operand (operands[2], <MODE>mode))"
    "@
     ins\t%0.<single_type>[1], %2.<single_type>[0]
     ins\t%0.<single_type>[1], %<single_wx>2
+   fmov\t%0.d[1], %2
     ld1\t{%0.<single_type>}[1], %2
     stp\t%<single_type>2, %<single_type>1, %y0
     stp\t%<single_wx>2, %<single_wx>1, %y0"
-  [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, neon_load1_one_lane<dblq>, neon_stp, store_16")]
+  [(set_attr "type" "neon_ins<dblq>, neon_from_gp<dblq>, f_mcr, neon_load1_one_lane<dblq>, neon_stp, store_16")
+   (set_attr "arch" "simd,simd,*,simd,*,*")]
  )
  
  ;; In this insn, operand 1 should be low, and operand 2 the high part of the
@@ -4319,13 +4357,12 @@
         (vec_concat:<VDBL>
           (match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m")
           (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+  "TARGET_FLOAT && !BYTES_BIG_ENDIAN"
    "@
     fmov\\t%<single_type>0, %<single_type>1
     fmov\t%<single_type>0, %<single_wx>1
     ldr\\t%<single_type>0, %1"
-  [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
-   (set_attr "arch" "simd,fp,simd")]
+  [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")]
  )
  
  (define_insn "*aarch64_combinez_be<mode>"
@@ -4333,13 +4370,12 @@
          (vec_concat:<VDBL>
           (match_operand:VDCSIF 2 "aarch64_simd_or_scalar_imm_zero")
           (match_operand:VDCSIF 1 "nonimmediate_operand" "w,?r,m")))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN"
+  "TARGET_FLOAT && BYTES_BIG_ENDIAN"
    "@
     fmov\\t%<single_type>0, %<single_type>1
     fmov\t%<single_type>0, %<single_wx>1
     ldr\\t%<single_type>0, %1"
-  [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
-   (set_attr "arch" "simd,fp,simd")]
+  [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")]
  )
  
  ;; Form a vector whose first half (in array order) comes from operand 1
@@ -4350,7 +4386,7 @@
         (vec_concat:<VDBL>
           (match_operand:VDCSIF 1 "general_operand")
           (match_operand:VDCSIF 2 "general_operand")))]
-  "TARGET_SIMD"
+  "TARGET_FLOAT"
  {
    int lo = BYTES_BIG_ENDIAN ? 2 : 1;
    int hi = BYTES_BIG_ENDIAN ? 1 : 2;
@@ -4368,7 +4404,7 @@
      }
    else
      {
-      /* Use *aarch64_combine_general<mode>.  */
+      /* Use *aarch64_combine_internal<mode>.  */
        operands[lo] = force_reg (<MODE>mode, operands[lo]);
        if (!aarch64_simd_nonimmediate_operand (operands[hi], <MODE>mode))
         {
@@ -4390,7 +4426,7 @@
    [(match_operand:<VDBL> 0 "register_operand")
     (match_operand:VDC 1 "general_operand")
     (match_operand:VDC 2 "general_operand")]
-  "TARGET_SIMD"
+  "TARGET_FLOAT"
  {
    if (BYTES_BIG_ENDIAN)
      std::swap (operands[1], operands[2]);
@@ -7063,7 +7099,7 @@
  (define_expand "mov<mode>"
    [(set (match_operand:VSTRUCT_QD 0 "nonimmediate_operand")
         (match_operand:VSTRUCT_QD 1 "general_operand"))]
-  "TARGET_SIMD"
+  "TARGET_FLOAT"
  {
    if (can_create_pseudo_p ())
      {
@@ -7075,7 +7111,7 @@
  (define_expand "mov<mode>"
    [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
         (match_operand:VSTRUCT 1 "general_operand"))]
-  "TARGET_SIMD"
+  "TARGET_FLOAT"
  {
    if (can_create_pseudo_p ())
      {
@@ -7255,7 +7291,8 @@
  (define_insn "*aarch64_be_mov<mode>"
    [(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand" "=w,m,w")
         (match_operand:VSTRUCT_2D 1 "general_operand"      " w,w,m"))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN
+  "TARGET_FLOAT
+   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
     && (register_operand (operands[0], <MODE>mode)
         || register_operand (operands[1], <MODE>mode))"
    "@
@@ -7269,7 +7306,8 @@
  (define_insn "*aarch64_be_mov<mode>"
    [(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand" "=w,m,w")
         (match_operand:VSTRUCT_2Q 1 "general_operand"      " w,w,m"))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN
+  "TARGET_FLOAT
+   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
     && (register_operand (operands[0], <MODE>mode)
         || register_operand (operands[1], <MODE>mode))"
    "@
@@ -7277,13 +7315,15 @@
     stp\\t%q1, %R1, %0
     ldp\\t%q0, %R0, %1"
    [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
+   (set_attr "arch" "simd,*,*")
     (set_attr "length" "8,4,4")]
  )
  
  (define_insn "*aarch64_be_movoi"
    [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
         (match_operand:OI 1 "general_operand"      " w,w,m"))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN
+  "TARGET_FLOAT
+   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
     && (register_operand (operands[0], OImode)
         || register_operand (operands[1], OImode))"
    "@
@@ -7291,57 +7331,66 @@
     stp\\t%q1, %R1, %0
     ldp\\t%q0, %R0, %1"
    [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
+   (set_attr "arch" "simd,*,*")
     (set_attr "length" "8,4,4")]
  )
  
  (define_insn "*aarch64_be_mov<mode>"
    [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w")
         (match_operand:VSTRUCT_3QD 1 "general_operand"      " w,w,o"))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN
+  "TARGET_FLOAT
+   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
     && (register_operand (operands[0], <MODE>mode)
         || register_operand (operands[1], <MODE>mode))"
    "#"
    [(set_attr "type" "multiple")
+   (set_attr "arch" "fp<q>,*,*")
     (set_attr "length" "12,8,8")]
  )
  
  (define_insn "*aarch64_be_movci"
    [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
         (match_operand:CI 1 "general_operand"      " w,w,o"))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN
+  "TARGET_FLOAT
+   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
     && (register_operand (operands[0], CImode)
         || register_operand (operands[1], CImode))"
    "#"
    [(set_attr "type" "multiple")
-   (set_attr "length" "12,4,4")]
+   (set_attr "arch" "simd,*,*")
+   (set_attr "length" "12,8,8")]
  )
  
  (define_insn "*aarch64_be_mov<mode>"
    [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w")
         (match_operand:VSTRUCT_4QD 1 "general_operand"      " w,w,o"))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN
+  "TARGET_FLOAT
+   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
     && (register_operand (operands[0], <MODE>mode)
         || register_operand (operands[1], <MODE>mode))"
    "#"
    [(set_attr "type" "multiple")
+   (set_attr "arch" "fp<q>,*,*")
     (set_attr "length" "16,8,8")]
  )
  
  (define_insn "*aarch64_be_movxi"
    [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
         (match_operand:XI 1 "general_operand"      " w,w,o"))]
-  "TARGET_SIMD && BYTES_BIG_ENDIAN
+  "TARGET_FLOAT
+   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
     && (register_operand (operands[0], XImode)
         || register_operand (operands[1], XImode))"
    "#"
    [(set_attr "type" "multiple")
-   (set_attr "length" "16,4,4")]
+   (set_attr "arch" "simd,*,*")
+   (set_attr "length" "16,8,8")]
  )
  
  (define_split
    [(set (match_operand:VSTRUCT_2QD 0 "register_operand")
         (match_operand:VSTRUCT_2QD 1 "register_operand"))]
-  "TARGET_SIMD && reload_completed"
+  "TARGET_FLOAT && reload_completed"
    [(const_int 0)]
  {
    aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 2);
@@ -7351,7 +7400,7 @@
  (define_split
    [(set (match_operand:OI 0 "register_operand")
         (match_operand:OI 1 "register_operand"))]
-  "TARGET_SIMD && reload_completed"
+  "TARGET_FLOAT && reload_completed"
    [(const_int 0)]
  {
    aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
@@ -7361,7 +7410,7 @@
  (define_split
    [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand")
         (match_operand:VSTRUCT_3QD 1 "general_operand"))]
-  "TARGET_SIMD && reload_completed"
+  "TARGET_FLOAT && reload_completed"
    [(const_int 0)]
  {
    if (register_operand (operands[0], <MODE>mode)
@@ -7370,7 +7419,7 @@
        aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
        DONE;
      }
-  else if (BYTES_BIG_ENDIAN)
+  else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
      {
        int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
        machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
@@ -7397,7 +7446,7 @@
  (define_split
    [(set (match_operand:CI 0 "nonimmediate_operand")
         (match_operand:CI 1 "general_operand"))]
-  "TARGET_SIMD && reload_completed"
+  "TARGET_FLOAT && reload_completed"
    [(const_int 0)]
  {
    if (register_operand (operands[0], CImode)
@@ -7406,7 +7455,7 @@
        aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
        DONE;
      }
-  else if (BYTES_BIG_ENDIAN)
+  else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
      {
        emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
                       simplify_gen_subreg (OImode, operands[1], CImode, 0));
@@ -7425,7 +7474,7 @@
  (define_split
    [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand")
         (match_operand:VSTRUCT_4QD 1 "general_operand"))]
-  "TARGET_SIMD && reload_completed"
+  "TARGET_FLOAT && reload_completed"
    [(const_int 0)]
  {
    if (register_operand (operands[0], <MODE>mode)
@@ -7434,7 +7483,7 @@
        aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
        DONE;
      }
-  else if (BYTES_BIG_ENDIAN)
+  else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
      {
        int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
        machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
@@ -7455,7 +7504,7 @@
  (define_split
    [(set (match_operand:XI 0 "nonimmediate_operand")
         (match_operand:XI 1 "general_operand"))]
-  "TARGET_SIMD && reload_completed"
+  "TARGET_FLOAT && reload_completed"
    [(const_int 0)]
  {
    if (register_operand (operands[0], XImode)
@@ -7464,7 +7513,7 @@
        aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
        DONE;
      }
-  else if (BYTES_BIG_ENDIAN)
+  else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
      {
        emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
                       simplify_gen_subreg (OImode, operands[1], XImode, 0));
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc

index 786ede76131fb523147cfee62099b7bd7f55508c..467979a645aebc23d4f2ad0db3892bd89659ff8a 100644 (file)
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -3492,7 +3492,7 @@ aarch64_classify_vector_mode (machine_mode mode)
      case E_OImode:
      case E_CImode:
      case E_XImode:
-      return TARGET_SIMD ? VEC_ADVSIMD | VEC_STRUCT : 0;
+      return TARGET_FLOAT ? VEC_ADVSIMD | VEC_STRUCT : 0;
  
      /* Structures of 64-bit Advanced SIMD vectors.  */
      case E_V2x8QImode:
@@ -3519,7 +3519,7 @@ aarch64_classify_vector_mode (machine_mode mode)
      case E_V4x4HFmode:
      case E_V4x2SFmode:
      case E_V4x1DFmode:
-      return TARGET_SIMD ? VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL : 0;
+      return TARGET_FLOAT ? VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL : 0;
  
      /* Structures of 128-bit Advanced SIMD vectors.  */
      case E_V2x16QImode:
@@ -3546,7 +3546,7 @@ aarch64_classify_vector_mode (machine_mode mode)
      case E_V4x8HFmode:
      case E_V4x4SFmode:
      case E_V4x2DFmode:
-      return TARGET_SIMD ? VEC_ADVSIMD | VEC_STRUCT : 0;
+      return TARGET_FLOAT ? VEC_ADVSIMD | VEC_STRUCT : 0;
  
      /* 64-bit Advanced SIMD vectors.  */
      case E_V8QImode:
@@ -3566,7 +3566,7 @@ aarch64_classify_vector_mode (machine_mode mode)
      case E_V8BFmode:
      case E_V4SFmode:
      case E_V2DFmode:
-      return TARGET_SIMD ? VEC_ADVSIMD : 0;
+      return TARGET_FLOAT ? VEC_ADVSIMD : 0;
  
      default:
        return 0;
@@ -3854,7 +3854,8 @@ aarch64_vectorize_related_mode (machine_mode vector_mode,
      }
  
    /* Prefer to use 1 128-bit vector instead of 2 64-bit vectors.  */
-  if ((vec_flags & VEC_ADVSIMD)
+  if (TARGET_SIMD
+      && (vec_flags & VEC_ADVSIMD)
        && known_eq (nunits, 0U)
        && known_eq (GET_MODE_BITSIZE (vector_mode), 64U)
        && maybe_ge (GET_MODE_BITSIZE (element_mode)
@@ -3952,7 +3953,7 @@ aarch64_hard_regno_mode_ok (unsigned regno, machine_mode mode)
  
    if (GP_REGNUM_P (regno))
      {
-      if (vec_flags & VEC_ANY_SVE)
+      if (vec_flags & (VEC_ANY_SVE | VEC_STRUCT))
         return false;
        if (known_le (GET_MODE_SIZE (mode), 8))
         return true;
@@ -10602,7 +10603,8 @@ aarch64_classify_address (struct aarch64_address_info *info,
                             || mode == TImode
                             || mode == TFmode
                             || mode == TDmode
-                           || (BYTES_BIG_ENDIAN && advsimd_struct_p));
+                           || ((!TARGET_SIMD || BYTES_BIG_ENDIAN)
+                               && advsimd_struct_p));
    /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
       corresponds to the actual size of the memory being loaded/stored and the
       mode of the corresponding addressing mode is half of that.  */
@@ -10632,6 +10634,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
    /* On LE, for AdvSIMD, don't support anything other than POST_INC or
       REG addressing.  */
    if (advsimd_struct_p
+      && TARGET_SIMD
        && !BYTES_BIG_ENDIAN
        && (code != POST_INC && code != REG))
      return false;
@@ -10694,7 +10697,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
                     && aarch64_offset_7bit_signed_scaled_p (DImode, offset + 48));
  
           /* A 7bit offset check because OImode will emit a ldp/stp
-            instruction (only big endian will get here).
+            instruction (only !TARGET_SIMD or big endian will get here).
              For ldp/stp instructions, the offset is scaled for the size of a
              single element of the pair.  */
           if (aarch64_advsimd_partial_struct_mode_p (mode)
@@ -10705,7 +10708,8 @@ aarch64_classify_address (struct aarch64_address_info *info,
             return aarch64_offset_7bit_signed_scaled_p (TImode, offset);
  
           /* Three 9/12 bit offsets checks because CImode will emit three
-            ldr/str instructions (only big endian will get here).  */
+            ldr/str instructions (only !TARGET_SIMD or big endian will
+            get here).  */
           if (aarch64_advsimd_partial_struct_mode_p (mode)
               && known_eq (GET_MODE_SIZE (mode), 24))
             return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
@@ -12428,18 +12432,16 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
    /* Use aarch64_sve_reload_mem for SVE memory reloads that cannot use
       LDR and STR.  See the comment at the head of aarch64-sve.md for
       more details about the big-endian handling.  */
+  unsigned int vec_flags = aarch64_classify_vector_mode (mode);
    if (reg_class_subset_p (rclass, FP_REGS)
        && !((REG_P (x) && HARD_REGISTER_P (x))
            || aarch64_simd_valid_immediate (x, NULL))
-      && mode != VNx16QImode)
+      && mode != VNx16QImode
+      && (vec_flags & VEC_SVE_DATA)
+      && ((vec_flags & VEC_PARTIAL) || BYTES_BIG_ENDIAN))
      {
-      unsigned int vec_flags = aarch64_classify_vector_mode (mode);
-      if ((vec_flags & VEC_SVE_DATA)
-         && ((vec_flags & VEC_PARTIAL) || BYTES_BIG_ENDIAN))
-       {
-         sri->icode = CODE_FOR_aarch64_sve_reload_mem;
-         return NO_REGS;
-       }
+      sri->icode = CODE_FOR_aarch64_sve_reload_mem;
+      return NO_REGS;
      }
  
    /* If we have to disable direct literal pool loads and stores because the
@@ -12456,9 +12458,13 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
    /* Without the TARGET_SIMD instructions we cannot move a Q register
       to a Q register directly.  We need a scratch.  */
    if (REG_P (x)
-      && (mode == TFmode || mode == TImode || mode == TDmode)
+      && (mode == TFmode
+         || mode == TImode
+         || mode == TDmode
+         || (vec_flags == VEC_ADVSIMD && known_eq (GET_MODE_SIZE (mode), 16)))
        && mode == GET_MODE (x)
-      && FP_REGNUM_P (REGNO (x)) && !TARGET_SIMD
+      && !TARGET_SIMD
+      && FP_REGNUM_P (REGNO (x))
        && reg_class_subset_p (rclass, FP_REGS))
      {
        sri->icode = code_for_aarch64_reload_mov (mode);
@@ -12480,6 +12486,28 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
    return NO_REGS;
  }
  
+/* Implement TARGET_SECONDARY_MEMORY_NEEDED.  */
+
+static bool
+aarch64_secondary_memory_needed (machine_mode mode, reg_class_t class1,
+                                reg_class_t class2)
+{
+  if (!TARGET_SIMD
+      && reg_classes_intersect_p (class1, FP_REGS)
+      && reg_classes_intersect_p (class2, FP_REGS))
+    {
+      /* We can't do a 128-bit FPR-to-FPR move without TARGET_SIMD,
+        so we can't easily split a move involving tuples of 128-bit
+        vectors.  Force the copy through memory instead.
+
+        (Tuples of 64-bit vectors are fine.)  */
+      unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+      if (vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
+       return true;
+    }
+  return false;
+}
+
  static bool
  aarch64_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
  {
@@ -13023,7 +13051,7 @@ aarch64_rtx_mult_cost (rtx x, enum rtx_code code, int outer, bool speed)
    if (VECTOR_MODE_P (mode))
      {
        unsigned int vec_flags = aarch64_classify_vector_mode (mode);
-      if (vec_flags & VEC_ADVSIMD)
+      if (TARGET_SIMD && (vec_flags & VEC_ADVSIMD))
         {
           /* The select-operand-high-half versions of the instruction have the
              same cost as the three vector version - don't add the costs of the
@@ -13969,7 +13997,7 @@ cost_minus:
           {
             /* SUBL2 and SUBW2.  */
             unsigned int vec_flags = aarch64_classify_vector_mode (mode);
-           if (vec_flags & VEC_ADVSIMD)
+           if (TARGET_SIMD && (vec_flags & VEC_ADVSIMD))
               {
                 /* The select-operand-high-half versions of the sub instruction
                    have the same cost as the regular three vector version -
@@ -14056,7 +14084,7 @@ cost_plus:
           {
             /* ADDL2 and ADDW2.  */
             unsigned int vec_flags = aarch64_classify_vector_mode (mode);
-           if (vec_flags & VEC_ADVSIMD)
+           if (TARGET_SIMD && (vec_flags & VEC_ADVSIMD))
               {
                 /* The select-operand-high-half versions of the add instruction
                    have the same cost as the regular three vector version -
@@ -14981,7 +15009,9 @@ aarch64_register_move_cost (machine_mode mode,
      return aarch64_register_move_cost (mode, from, GENERAL_REGS)
              + aarch64_register_move_cost (mode, GENERAL_REGS, to);
  
-  if (known_eq (GET_MODE_SIZE (mode), 16))
+  unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+  if (vec_flags != (VEC_ADVSIMD | VEC_STRUCT | VEC_PARTIAL)
+      && known_eq (GET_MODE_SIZE (mode), 16))
      {
        /* 128-bit operations on general registers require 2 instructions.  */
        if (from == GENERAL_REGS && to == GENERAL_REGS)
@@ -15009,6 +15039,16 @@ aarch64_register_move_cost (machine_mode mode,
    else if (to == GENERAL_REGS)
      return regmove_cost->FP2GP;
  
+  if (!TARGET_SIMD && vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
+    {
+      /* Needs a round-trip through memory, which can use LDP/STP for pairs.
+        The cost must be greater than 2 units to indicate that direct
+        moves aren't possible.  */
+      auto per_vector = (aarch64_tune_params.memmov_cost.load_fp
+                        + aarch64_tune_params.memmov_cost.store_fp);
+      return MIN (CEIL (per_vector, 2), 4);
+    }
+
    return regmove_cost->FP2FP;
  }
  
@@ -21115,6 +21155,9 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
    if (vec_flags == 0 || vec_flags == (VEC_ADVSIMD | VEC_STRUCT))
      return false;
  
+  if ((vec_flags & VEC_ADVSIMD) && !TARGET_SIMD)
+    return false;
+
    if (vec_flags & VEC_SVE_PRED)
      return aarch64_sve_pred_valid_immediate (op, info);
  
@@ -24048,7 +24091,7 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
        std::swap (d->op0, d->op1);
      }
  
-  if ((d->vec_flags == VEC_ADVSIMD
+  if (((d->vec_flags == VEC_ADVSIMD && TARGET_SIMD)
         || d->vec_flags == VEC_SVE_DATA
         || d->vec_flags == (VEC_SVE_DATA | VEC_PARTIAL)
         || d->vec_flags == VEC_SVE_PRED)
@@ -27482,6 +27525,9 @@ aarch64_libgcc_floating_mode_supported_p
  #undef TARGET_SECONDARY_RELOAD
  #define TARGET_SECONDARY_RELOAD aarch64_secondary_reload
  
+#undef TARGET_SECONDARY_MEMORY_NEEDED
+#define TARGET_SECONDARY_MEMORY_NEEDED aarch64_secondary_memory_needed
+
  #undef TARGET_SHIFT_TRUNCATION_MASK
  #define TARGET_SHIFT_TRUNCATION_MASK aarch64_shift_truncation_mask
  
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md

index efcbecbf67a5e5b48429f83199a11b8c092956cf..3f8e40a48b5e512600caefbf73508e4da213df7a 100644 (file)
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -370,8 +370,11 @@
  ;; Attributes of the architecture required to support the instruction (or
  ;; alternative). This attribute is used to compute attribute "enabled", use type
  ;; "any" to enable an alternative in all cases.
+;;
+;; As a convenience, "fp_q" means "fp" + the ability to move between
+;; Q registers and is equivalent to "simd".
  
-(define_enum "arches" [ any rcpc8_4 fp simd sve fp16])
+(define_enum "arches" [ any rcpc8_4 fp fp_q simd sve fp16])
  
  (define_enum_attr "arch" "arches" (const_string "any"))
  
@@ -399,7 +402,7 @@
         (and (eq_attr "arch" "fp")
              (match_test "TARGET_FLOAT"))
  
-       (and (eq_attr "arch" "simd")
+       (and (eq_attr "arch" "fp_q, simd")
              (match_test "TARGET_SIMD"))
  
         (and (eq_attr "arch" "fp16")
@@ -6819,8 +6822,8 @@
  )
  
  (define_expand "@aarch64_reload_mov<mode>"
-  [(set (match_operand:TX 0 "register_operand" "=w")
-        (match_operand:TX 1 "register_operand" "w"))
+  [(set (match_operand:VTX 0 "register_operand" "=w")
+        (match_operand:VTX 1 "register_operand" "w"))
     (clobber (match_operand:DI 2 "register_operand" "=&r"))
    ]
    "TARGET_FLOAT"
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index 0dd9dc66f7ccd78acacb759662d0cd561cd5b4ef..9354dbec866f3a75212fec181d86ed1f9183eb42 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -313,6 +313,8 @@
  
  (define_mode_iterator TX [TI TF TD])
  
+(define_mode_iterator VTX [TI TF TD V16QI V8HI V4SI V2DI V8HF V4SF V2DF V8BF])
+
  ;; Advanced SIMD opaque structure modes.
  (define_mode_iterator VSTRUCT [OI CI XI])
  
diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c

new file mode 100644 (file)

index 0000000..7e705e1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c
@@ -0,0 +1,7 @@
+/* { dg-options "-O2" } */
+
+#pragma GCC target "+nosimd+fp"
+
+#include "ldp_stp_6.c"
+
+/* { dg-final { scan-assembler "stp\td\[0-9\]+, d\[0-9\]+, \\\[x\[0-9\]+\\\]" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c

new file mode 100644 (file)

index 0000000..462e3c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c
@@ -0,0 +1,7 @@
+/* { dg-options "-O2" } */
+
+#pragma GCC target "+nosimd+fp"
+
+#include "ldp_stp_8.c"
+
+/* { dg-final { scan-assembler-times "ldp\td\[0-9\], d\[0-9\]+, \\\[x\[0-9\]+\\\]" 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c

new file mode 100644 (file)

index 0000000..283c56d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c
@@ -0,0 +1,13 @@
+/* { dg-options "-O2" } */
+
+#pragma GCC target "+nosimd+fp"
+
+void
+foo (__Float32x4_t *ptr)
+{
+  ptr[0] = ptr[2];
+  ptr[1] = ptr[3];
+}
+
+/* { dg-final { scan-assembler {\tldp\tq[0-9]+, q[0-9]+} } } */
+/* { dg-final { scan-assembler {\tstp\tq[0-9]+, q[0-9]+} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c

new file mode 100644 (file)

index 0000000..b14976c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c
@@ -0,0 +1,16 @@
+/* { dg-options "-O2" } */
+
+#pragma GCC target "+nosimd+fp"
+
+void
+foo (char *char_ptr)
+{
+  __Float64x2_t *ptr = (__Float64x2_t *)(char_ptr + 1);
+  asm volatile ("" ::
+               "w" (ptr[1]),
+               "w" (ptr[2]),
+               "w" (ptr[3]),
+               "w" (ptr[4]));
+}
+
+/* { dg-final { scan-assembler-times {\tldp\tq[0-9]+, q[0-9]+} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c b/gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c

new file mode 100644 (file)

index 0000000..a99426e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c
@@ -0,0 +1,16 @@
+/* { dg-options "-O2" } */
+
+#pragma GCC target "+nosimd+fp"
+
+void
+foo (char *char_ptr)
+{
+  __Float64x2_t *ptr = (__Float64x2_t *)(char_ptr + 1);
+  asm volatile ("" :
+               "=w" (ptr[1]),
+               "=w" (ptr[2]),
+               "=w" (ptr[3]),
+               "=w" (ptr[4]));
+}
+
+/* { dg-final { scan-assembler-times {\tstp\tq[0-9]+, q[0-9]+} 2 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c

index 8a6afb13bf169bfbc622e2a9bf446d67fc51502a..cac4241b0d1fcdee246dae66f0fa07b1630d82fc 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_1.c
@@ -80,3 +80,24 @@ fpr_to_gpr (v16qi q0)
    x0 = q0;
    asm volatile ("" :: "r" (x0));
  }
+
+/*
+** gpr_to_gpr:
+** (
+**     mov     x0, x2
+**     mov     x1, x3
+** |
+**     mov     x1, x3
+**     mov     x0, x2
+** )
+**     ret
+*/
+void
+gpr_to_gpr ()
+{
+  register v16qi x0 asm ("x0");
+  register v16qi x2 asm ("x2");
+  asm volatile ("" : "=r" (x2));
+  x0 = x2;
+  asm volatile ("" :: "r" (x0));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_2.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_2.c

new file mode 100644 (file)

index 0000000..08a0a19
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_2.c
@@ -0,0 +1,27 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+
+#pragma GCC target "+nosimd+fp"
+
+#define TEST_GENERAL(TYPE) \
+  TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \
+  TYPE zero_##TYPE () { return (TYPE) {}; } \
+  TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \
+  void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; }
+
+TEST_GENERAL (__Int8x16_t)
+TEST_GENERAL (__Int16x8_t)
+TEST_GENERAL (__Int32x4_t)
+TEST_GENERAL (__Int64x2_t)
+TEST_GENERAL (__Bfloat16x8_t)
+TEST_GENERAL (__Float16x8_t)
+TEST_GENERAL (__Float32x4_t)
+TEST_GENERAL (__Float64x2_t)
+
+__Int8x16_t const_s8x8 () { return (__Int8x16_t) { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 }; }
+__Int16x8_t const_s16x4 () { return (__Int16x8_t) { 1, 0, 1, 0, 1, 0, 1, 0 }; }
+__Int32x4_t const_s32x2 () { return (__Int32x4_t) { 1, 2, 3, 4 }; }
+__Int64x2_t const_s64x1 () { return (__Int64x2_t) { 100, 100 }; }
+__Float16x8_t const_f16x4 () { return (__Float16x8_t) { 2, 2, 2, 2, 2, 2, 2, 2 }; }
+__Float32x4_t const_f32x2 () { return (__Float32x4_t) { 1, 2, 1, 2 }; }
+__Float64x2_t const_f64x1 () { return (__Float64x2_t) { 32, 32 }; }
diff --git a/gcc/testsuite/gcc.target/aarch64/movv16qi_3.c b/gcc/testsuite/gcc.target/aarch64/movv16qi_3.c

new file mode 100644 (file)

index 0000000..d43b994
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv16qi_3.c
@@ -0,0 +1,30 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target "+nosimd+fp"
+
+#define TEST_VECTOR(TYPE) \
+  TYPE \
+  test_##TYPE (void) \
+  { \
+    typedef TYPE v __attribute__((aligned(1))); \
+    register v *ptr asm ("x0"); \
+    asm volatile ("" : "=r" (ptr)); \
+    return *ptr; \
+  }
+
+TEST_VECTOR (__Int8x16_t)
+TEST_VECTOR (__Int16x8_t)
+TEST_VECTOR (__Int32x4_t)
+TEST_VECTOR (__Int64x2_t)
+TEST_VECTOR (__Bfloat16x8_t)
+TEST_VECTOR (__Float16x8_t)
+TEST_VECTOR (__Float32x4_t)
+TEST_VECTOR (__Float64x2_t)
+
+/*
+** test___Int8x16_t:
+**     ldr     q0, \[x0\]
+**     ret
+*/
diff --git a/gcc/testsuite/gcc.target/aarch64/movv2di_1.c b/gcc/testsuite/gcc.target/aarch64/movv2di_1.c

new file mode 100644 (file)

index 0000000..e3b55fd
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv2di_1.c
@@ -0,0 +1,103 @@
+/* { dg-do assemble } */
+/* { dg-options "-O -mtune=neoverse-v1 --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target "+nothing+nosimd+fp"
+
+typedef long long v2di __attribute__((vector_size(16)));
+
+/*
+** fpr_to_fpr:
+**     sub     sp, sp, #16
+**     str     q1, \[sp\]
+**     ldr     q0, \[sp\]
+**     add     sp, sp, #?16
+**     ret
+*/
+v2di
+fpr_to_fpr (v2di q0, v2di q1)
+{
+  return q1;
+}
+
+/*
+** gpr_to_fpr: { target aarch64_little_endian }
+**     fmov    d0, x0
+**     fmov    v0.d\[1\], x1
+**     ret
+*/
+/*
+** gpr_to_fpr: { target aarch64_big_endian }
+**     fmov    d0, x1
+**     fmov    v0.d\[1\], x0
+**     ret
+*/
+v2di
+gpr_to_fpr ()
+{
+  register v2di x0 asm ("x0");
+  asm volatile ("" : "=r" (x0));
+  return x0;
+}
+
+/*
+** zero_to_fpr:
+**     fmov    d0, xzr
+**     ret
+*/
+v2di
+zero_to_fpr ()
+{
+  return (v2di) {};
+}
+
+/*
+** fpr_to_gpr: { target aarch64_little_endian }
+** (
+**     fmov    x0, d0
+**     fmov    x1, v0.d\[1\]
+** |
+**     fmov    x1, v0.d\[1\]
+**     fmov    x0, d0
+** )
+**     ret
+*/
+/*
+** fpr_to_gpr: { target aarch64_big_endian }
+** (
+**     fmov    x1, d0
+**     fmov    x0, v0.d\[1\]
+** |
+**     fmov    x0, v0.d\[1\]
+**     fmov    x1, d0
+** )
+**     ret
+*/
+void
+fpr_to_gpr (v2di q0)
+{
+  register v2di x0 asm ("x0");
+  x0 = q0;
+  asm volatile ("" :: "r" (x0));
+}
+
+/*
+** gpr_to_gpr:
+** (
+**     mov     x0, x2
+**     mov     x1, x3
+** |
+**     mov     x1, x3
+**     mov     x0, x2
+** )
+**     ret
+*/
+void
+gpr_to_gpr ()
+{
+  register v2di x0 asm ("x0");
+  register v2di x2 asm ("x2");
+  asm volatile ("" : "=r" (x2));
+  x0 = x2;
+  asm volatile ("" :: "r" (x0));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/movv2x16qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv2x16qi_1.c

new file mode 100644 (file)

index 0000000..90e3b42
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv2x16qi_1.c
@@ -0,0 +1,40 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC aarch64 "arm_neon.h"
+
+#pragma GCC target "+nosimd+fp"
+
+#define TEST_VECTOR(TYPE) \
+  TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \
+  TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \
+  void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; }
+
+TEST_VECTOR (int8x16x2_t)
+TEST_VECTOR (int16x8x2_t)
+TEST_VECTOR (int32x4x2_t)
+TEST_VECTOR (int64x2x2_t)
+TEST_VECTOR (float16x8x2_t)
+TEST_VECTOR (bfloat16x8x2_t)
+TEST_VECTOR (float32x4x2_t)
+TEST_VECTOR (float64x2x2_t)
+
+/*
+** mov_int8x16x2_t:
+**     sub     sp, sp, #32
+**     stp     q2, q3, \[sp\]
+**     ldp     q0, q1, \[sp\]
+**     add     sp, sp, #?32
+**     ret
+*/
+/*
+** load_int8x16x2_t:
+**     ldp     q0, q1, \[x0\]
+**     ret
+*/
+/*
+** store_int8x16x2_t: { xfail *-*-* }
+**     stp     q0, q1, \[x0\]
+**     ret
+*/
diff --git a/gcc/testsuite/gcc.target/aarch64/movv2x8qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv2x8qi_1.c

new file mode 100644 (file)

index 0000000..883a0ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv2x8qi_1.c
@@ -0,0 +1,38 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC aarch64 "arm_neon.h"
+
+#pragma GCC target "+nosimd+fp"
+
+#define TEST_VECTOR(TYPE) \
+  TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \
+  TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \
+  void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; }
+
+TEST_VECTOR (int8x8x2_t)
+TEST_VECTOR (int16x4x2_t)
+TEST_VECTOR (int32x2x2_t)
+TEST_VECTOR (int64x1x2_t)
+TEST_VECTOR (float16x4x2_t)
+TEST_VECTOR (bfloat16x4x2_t)
+TEST_VECTOR (float32x2x2_t)
+TEST_VECTOR (float64x1x2_t)
+
+/*
+** mov_int8x8x2_t:
+**     fmov    d0, d2
+**     fmov    d1, d3
+**     ret
+*/
+/*
+** load_int8x8x2_t:
+**     ldp     d0, d1, \[x0\]
+**     ret
+*/
+/*
+** store_int8x8x2_t:
+**     stp     d0, d1, \[x0\]
+**     ret
+*/
diff --git a/gcc/testsuite/gcc.target/aarch64/movv3x16qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv3x16qi_1.c

new file mode 100644 (file)

index 0000000..070a596
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv3x16qi_1.c
@@ -0,0 +1,44 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC aarch64 "arm_neon.h"
+
+#pragma GCC target "+nosimd+fp"
+
+#define TEST_VECTOR(TYPE) \
+  TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \
+  TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \
+  void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; }
+
+TEST_VECTOR (int8x16x3_t)
+TEST_VECTOR (int16x8x3_t)
+TEST_VECTOR (int32x4x3_t)
+TEST_VECTOR (int64x2x3_t)
+TEST_VECTOR (float16x8x3_t)
+TEST_VECTOR (bfloat16x8x3_t)
+TEST_VECTOR (float32x4x3_t)
+TEST_VECTOR (float64x2x3_t)
+
+/*
+** mov_int8x16x3_t:
+**     sub     sp, sp, #48
+**     stp     q3, q4, \[sp\]
+**     str     q5, \[sp, #?32\]
+**     ldp     q0, q1, \[sp\]
+**     ldr     q2, \[sp, #?32\]
+**     add     sp, sp, #?48
+**     ret
+*/
+/*
+** load_int8x16x3_t:
+**     ldp     q0, q1, \[x0\]
+**     ldr     q2, \[x0, #?32\]
+**     ret
+*/
+/*
+** store_int8x16x3_t: { xfail *-*-* }
+**     stp     q0, q1, \[x0\]
+**     stp     q2, \[x0, #?32\]
+**     ret
+*/
diff --git a/gcc/testsuite/gcc.target/aarch64/movv3x8qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv3x8qi_1.c

new file mode 100644 (file)

index 0000000..4b873d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv3x8qi_1.c
@@ -0,0 +1,41 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC aarch64 "arm_neon.h"
+
+#pragma GCC target "+nosimd+fp"
+
+#define TEST_VECTOR(TYPE) \
+  TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \
+  TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \
+  void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; }
+
+TEST_VECTOR (int8x8x3_t)
+TEST_VECTOR (int16x4x3_t)
+TEST_VECTOR (int32x2x3_t)
+TEST_VECTOR (int64x1x3_t)
+TEST_VECTOR (float16x4x3_t)
+TEST_VECTOR (bfloat16x4x3_t)
+TEST_VECTOR (float32x2x3_t)
+TEST_VECTOR (float64x1x3_t)
+
+/*
+** mov_int8x8x3_t:
+**     fmov    d0, d3
+**     fmov    d1, d4
+**     fmov    d2, d5
+**     ret
+*/
+/*
+** load_int8x8x3_t:
+**     ldp     d0, d1, \[x0\]
+**     ldr     d2, \[x0, #?16\]
+**     ret
+*/
+/*
+** store_int8x8x3_t:
+**     stp     d0, d1, \[x0\]
+**     str     d2, \[x0, #?16\]
+**     ret
+*/
diff --git a/gcc/testsuite/gcc.target/aarch64/movv4x16qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv4x16qi_1.c

new file mode 100644 (file)

index 0000000..6a517b4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv4x16qi_1.c
@@ -0,0 +1,44 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC aarch64 "arm_neon.h"
+
+#pragma GCC target "+nosimd+fp"
+
+#define TEST_VECTOR(TYPE) \
+  TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \
+  TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \
+  void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; }
+
+TEST_VECTOR (int8x16x4_t)
+TEST_VECTOR (int16x8x4_t)
+TEST_VECTOR (int32x4x4_t)
+TEST_VECTOR (int64x2x4_t)
+TEST_VECTOR (float16x8x4_t)
+TEST_VECTOR (bfloat16x8x4_t)
+TEST_VECTOR (float32x4x4_t)
+TEST_VECTOR (float64x2x4_t)
+
+/*
+** mov_int8x16x4_t:
+**     sub     sp, sp, #64
+**     stp     q4, q5, \[sp\]
+**     stp     q6, q7, \[sp, #?32\]
+**     ldp     q0, q1, \[sp\]
+**     ldp     q2, q3, \[sp, #?32\]
+**     add     sp, sp, #?64
+**     ret
+*/
+/*
+** load_int8x16x4_t:
+**     ldp     q0, q1, \[x0\]
+**     ldp     q2, q3, \[x0, #?32\]
+**     ret
+*/
+/*
+** store_int8x16x4_t: { xfail *-*-* }
+**     stp     q0, q1, \[x0\]
+**     stp     q2, q3, \[x0, #?32\]
+**     ret
+*/
diff --git a/gcc/testsuite/gcc.target/aarch64/movv4x8qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv4x8qi_1.c

new file mode 100644 (file)

index 0000000..f096be4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv4x8qi_1.c
@@ -0,0 +1,42 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC aarch64 "arm_neon.h"
+
+#pragma GCC target "+nosimd+fp"
+
+#define TEST_VECTOR(TYPE) \
+  TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \
+  TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \
+  void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; }
+
+TEST_VECTOR (int8x8x4_t)
+TEST_VECTOR (int16x4x4_t)
+TEST_VECTOR (int32x2x4_t)
+TEST_VECTOR (int64x1x4_t)
+TEST_VECTOR (float16x4x4_t)
+TEST_VECTOR (bfloat16x4x4_t)
+TEST_VECTOR (float32x2x4_t)
+TEST_VECTOR (float64x1x4_t)
+
+/*
+** mov_int8x8x4_t:
+**     fmov    d0, d4
+**     fmov    d1, d5
+**     fmov    d2, d6
+**     fmov    d3, d7
+**     ret
+*/
+/*
+** load_int8x8x4_t:
+**     ldp     d0, d1, \[x0\]
+**     ldp     d2, d3, \[x0, #?16\]
+**     ret
+*/
+/*
+** store_int8x8x4_t:
+**     stp     d0, d1, \[x0\]
+**     stp     d2, d3, \[x0, #?16\]
+**     ret
+*/
diff --git a/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c b/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c

index 4c97e6fbc37998933e34983f037ba04448f0745c..d2b5d8025cf8eeb325ed4210c4eb7582d06f43d3 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/movv8qi_1.c
@@ -53,3 +53,18 @@ fpr_to_gpr (v8qi q0)
    x0 = q0;
    asm volatile ("" :: "r" (x0));
  }
+
+/*
+** gpr_to_gpr:
+**     mov     x0, x1
+**     ret
+*/
+void
+gpr_to_gpr ()
+{
+  register v8qi x0 asm ("x0");
+  register v8qi x1 asm ("x1");
+  asm volatile ("" : "=r" (x1));
+  x0 = x1;
+  asm volatile ("" :: "r" (x0));
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/movv8qi_2.c b/gcc/testsuite/gcc.target/aarch64/movv8qi_2.c

new file mode 100644 (file)

index 0000000..0d8576f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv8qi_2.c
@@ -0,0 +1,27 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+
+#pragma GCC target "+nosimd+fp"
+
+#define TEST_GENERAL(TYPE) \
+  TYPE mov_##TYPE (TYPE a, TYPE b) { return b; } \
+  TYPE zero_##TYPE () { return (TYPE) {}; } \
+  TYPE load_##TYPE (TYPE *ptr) { return *ptr; } \
+  void store_##TYPE (TYPE *ptr, TYPE a) { *ptr = a; }
+
+TEST_GENERAL (__Int8x8_t)
+TEST_GENERAL (__Int16x4_t)
+TEST_GENERAL (__Int32x2_t)
+TEST_GENERAL (__Int64x1_t)
+TEST_GENERAL (__Bfloat16x4_t)
+TEST_GENERAL (__Float16x4_t)
+TEST_GENERAL (__Float32x2_t)
+TEST_GENERAL (__Float64x1_t)
+
+__Int8x8_t const_s8x8 () { return (__Int8x8_t) { 1, 1, 1, 1, 1, 1, 1, 1 }; }
+__Int16x4_t const_s16x4 () { return (__Int16x4_t) { 1, 0, 1, 0 }; }
+__Int32x2_t const_s32x2 () { return (__Int32x2_t) { 1, 2 }; }
+__Int64x1_t const_s64x1 () { return (__Int64x1_t) { 100 }; }
+__Float16x4_t const_f16x4 () { return (__Float16x4_t) { 2, 2, 2, 2 }; }
+__Float32x2_t const_f32x2 () { return (__Float32x2_t) { 1, 2 }; }
+__Float64x1_t const_f64x1 () { return (__Float64x1_t) { 32 }; }
diff --git a/gcc/testsuite/gcc.target/aarch64/movv8qi_3.c b/gcc/testsuite/gcc.target/aarch64/movv8qi_3.c

new file mode 100644 (file)

index 0000000..1caa1a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/movv8qi_3.c
@@ -0,0 +1,30 @@
+/* { dg-do assemble } */
+/* { dg-options "-O --save-temps" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target "+nosimd+fp"
+
+#define TEST_VECTOR(TYPE) \
+  TYPE \
+  test_##TYPE (void) \
+  { \
+    typedef TYPE v __attribute__((aligned(1))); \
+    register v *ptr asm ("x0"); \
+    asm volatile ("" : "=r" (ptr)); \
+    return *ptr; \
+  }
+
+TEST_VECTOR (__Int8x8_t)
+TEST_VECTOR (__Int16x4_t)
+TEST_VECTOR (__Int32x2_t)
+TEST_VECTOR (__Int64x1_t)
+TEST_VECTOR (__Bfloat16x4_t)
+TEST_VECTOR (__Float16x4_t)
+TEST_VECTOR (__Float32x2_t)
+TEST_VECTOR (__Float64x1_t)
+
+/*
+** test___Int8x8_t:
+**     ldr     d0, \[x0\]
+**     ret
+*/
diff --git a/gcc/testsuite/gcc.target/aarch64/vect_unary_2.c b/gcc/testsuite/gcc.target/aarch64/vect_unary_2.c

new file mode 100644 (file)

index 0000000..454ac27
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vect_unary_2.c
@@ -0,0 +1,5 @@
+/* { dg-options "-O3 -fno-math-errno --save-temps" } */
+
+#pragma GCC target "+nosimd+fp"
+
+#include "vect_unary_1.c"
author	Richard Sandiford <richard.sandiford@arm.com>
	Tue, 13 Sep 2022 08:28:49 +0000 (09:28 +0100)
committer	Richard Sandiford <richard.sandiford@arm.com>
	Tue, 13 Sep 2022 08:28:49 +0000 (09:28 +0100)
gcc/config/aarch64/aarch64-ldpstp.md		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64.cc		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64.md		patch \| blob \| blame \| history
gcc/config/aarch64/iterators.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/ldp_stp_20.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/ldp_stp_21.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/ldp_stp_22.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/ldp_stp_23.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/ldp_stp_24.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/movv16qi_1.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/movv16qi_2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/movv16qi_3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/movv2di_1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/movv2x16qi_1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/movv2x8qi_1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/movv3x16qi_1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/movv3x8qi_1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/movv4x16qi_1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/movv4x8qi_1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/movv8qi_1.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/movv8qi_2.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/movv8qi_3.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/vect_unary_2.c	[new file with mode: 0644]	patch \| blob