amdgcn: multi-size vector reductions

author Andrew Stubbs <ams@codesourcery.com>

Fri, 28 Oct 2022 11:38:43 +0000 (12:38 +0100)

committer Andrew Stubbs <ams@codesourcery.com>

Mon, 31 Oct 2022 12:20:52 +0000 (12:20 +0000)
author Andrew Stubbs <ams@codesourcery.com>
Fri, 28 Oct 2022 11:38:43 +0000 (12:38 +0100)
committer Andrew Stubbs <ams@codesourcery.com>
Mon, 31 Oct 2022 12:20:52 +0000 (12:20 +0000)
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md

index 00c0e3be1ea433c8fd7b17f17a74f8db87f81ad0..6274d2e922802b3ee912e6355a7f703cc96c7a87 100644 (file)
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -32,11 +32,6 @@
  (define_mode_iterator V_DF
                       [V2DF V4DF V8DF V16DF V32DF V64DF])
  
-(define_mode_iterator V64_SI
-                     [V64SI])
-(define_mode_iterator V64_DI
-                     [V64DI])
-
  ; Vector modes for sub-dword modes
  (define_mode_iterator V_QIHI
                       [V2QI V2HI
@@ -77,13 +72,6 @@
                        V32HF V32SF
                        V64HF V64SF])
  
-; V64_* modes are for where more general support is unimplemented
-; (e.g. reductions)
-(define_mode_iterator V64_1REG
-                     [V64QI V64HI V64SI V64HF V64SF])
-(define_mode_iterator V64_INT_1REG
-                     [V64QI V64HI V64SI])
-
  ; Vector modes for two vector registers
  (define_mode_iterator V_2REG
                       [V2DI V2DF
@@ -93,9 +81,6 @@
                        V32DI V32DF
                        V64DI V64DF])
  
-(define_mode_iterator V64_2REG
-                     [V64DI V64DF])
-
  ; Vector modes with native support
  (define_mode_iterator V_noQI
                       [V2HI V2HF V2SI V2SF V2DI V2DF
@@ -158,11 +143,6 @@
                        V32HF V32SF V32DF
                        V64HF V64SF V64DF])
  
-(define_mode_iterator V64_ALL
-                     [V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
-(define_mode_iterator V64_FP
-                     [V64HF V64SF V64DF])
-
  (define_mode_attr scalar_mode
    [(V2QI "qi") (V2HI "hi") (V2SI "si")
     (V2HF "hf") (V2SF "sf") (V2DI "di") (V2DF "df")
@@ -3528,15 +3508,16 @@
  (define_expand "reduc_<reduc_op>_scal_<mode>"
    [(set (match_operand:<SCALAR_MODE> 0 "register_operand")
         (unspec:<SCALAR_MODE>
-         [(match_operand:V64_ALL 1 "register_operand")]
+         [(match_operand:V_ALL 1 "register_operand")]
           REDUC_UNSPEC))]
    ""
    {
      rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
                                        <reduc_unspec>);
  
-    /* The result of the reduction is in lane 63 of tmp.  */
-    emit_insn (gen_mov_from_lane63_<mode> (operands[0], tmp));
+    rtx last_lane = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1);
+    emit_insn (gen_vec_extract<mode><scalar_mode> (operands[0], tmp,
+                                                  last_lane));
  
      DONE;
    })
@@ -3547,7 +3528,7 @@
  (define_expand "fold_left_plus_<mode>"
   [(match_operand:<SCALAR_MODE> 0 "register_operand")
    (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
-  (match_operand:V64_FP 2 "gcn_alu_operand")]
+  (match_operand:V_FP 2 "gcn_alu_operand")]
    "can_create_pseudo_p ()
     && (flag_openacc || flag_openmp
         || flag_associative_math)"
@@ -3563,11 +3544,11 @@
     })
  
  (define_insn "*<reduc_op>_dpp_shr_<mode>"
-  [(set (match_operand:V64_1REG 0 "register_operand"   "=v")
-       (unspec:V64_1REG
-         [(match_operand:V64_1REG 1 "register_operand" "v")
-          (match_operand:V64_1REG 2 "register_operand" "v")
-          (match_operand:SI 3 "const_int_operand"      "n")]
+  [(set (match_operand:V_1REG 0 "register_operand"   "=v")
+       (unspec:V_1REG
+         [(match_operand:V_1REG 1 "register_operand" "v")
+          (match_operand:V_1REG 2 "register_operand" "v")
+          (match_operand:SI 3 "const_int_operand"        "n")]
           REDUC_UNSPEC))]
    ; GCN3 requires a carry out, GCN5 not
    "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
@@ -3580,11 +3561,11 @@
     (set_attr "length" "8")])
  
  (define_insn_and_split "*<reduc_op>_dpp_shr_<mode>"
-  [(set (match_operand:V64_DI 0 "register_operand"    "=v")
-       (unspec:V64_DI
-         [(match_operand:V64_DI 1 "register_operand" "v")
-          (match_operand:V64_DI 2 "register_operand" "v")
-          (match_operand:SI 3 "const_int_operand"    "n")]
+  [(set (match_operand:V_DI 0 "register_operand"    "=v")
+       (unspec:V_DI
+         [(match_operand:V_DI 1 "register_operand" "v")
+          (match_operand:V_DI 2 "register_operand" "v")
+          (match_operand:SI 3 "const_int_operand"  "n")]
           REDUC_2REG_UNSPEC))]
    ""
    "#"
@@ -3609,10 +3590,10 @@
  ; Special cases for addition.
  
  (define_insn "*plus_carry_dpp_shr_<mode>"
-  [(set (match_operand:V64_INT_1REG 0 "register_operand"   "=v")
-       (unspec:V64_INT_1REG
-         [(match_operand:V64_INT_1REG 1 "register_operand" "v")
-          (match_operand:V64_INT_1REG 2 "register_operand" "v")
+  [(set (match_operand:V_INT_1REG 0 "register_operand"   "=v")
+       (unspec:V_INT_1REG
+         [(match_operand:V_INT_1REG 1 "register_operand" "v")
+          (match_operand:V_INT_1REG 2 "register_operand" "v")
            (match_operand:SI 3 "const_int_operand"        "n")]
           UNSPEC_PLUS_CARRY_DPP_SHR))
     (clobber (reg:DI VCC_REG))]
@@ -3626,12 +3607,12 @@
     (set_attr "length" "8")])
  
  (define_insn "*plus_carry_in_dpp_shr_<mode>"
-  [(set (match_operand:V64_SI 0 "register_operand"    "=v")
-       (unspec:V64_SI
-         [(match_operand:V64_SI 1 "register_operand" "v")
-          (match_operand:V64_SI 2 "register_operand" "v")
-          (match_operand:SI 3 "const_int_operand"    "n")
-          (match_operand:DI 4 "register_operand"     "cV")]
+  [(set (match_operand:V_SI 0 "register_operand"    "=v")
+       (unspec:V_SI
+         [(match_operand:V_SI 1 "register_operand" "v")
+          (match_operand:V_SI 2 "register_operand" "v")
+          (match_operand:SI 3 "const_int_operand"  "n")
+          (match_operand:DI 4 "register_operand"   "cV")]
           UNSPEC_PLUS_CARRY_IN_DPP_SHR))
     (clobber (reg:DI VCC_REG))]
    ""
@@ -3644,11 +3625,11 @@
     (set_attr "length" "8")])
  
  (define_insn_and_split "*plus_carry_dpp_shr_<mode>"
-  [(set (match_operand:V64_DI 0 "register_operand"    "=v")
-       (unspec:V64_DI
-         [(match_operand:V64_DI 1 "register_operand" "v")
-          (match_operand:V64_DI 2 "register_operand" "v")
-          (match_operand:SI 3 "const_int_operand"    "n")]
+  [(set (match_operand:V_DI 0 "register_operand"    "=v")
+       (unspec:V_DI
+         [(match_operand:V_DI 1 "register_operand" "v")
+          (match_operand:V_DI 2 "register_operand" "v")
+          (match_operand:SI 3 "const_int_operand"  "n")]
           UNSPEC_PLUS_CARRY_DPP_SHR))
     (clobber (reg:DI VCC_REG))]
    ""
@@ -3675,38 +3656,6 @@
    [(set_attr "type" "vmult")
     (set_attr "length" "16")])
  
-; Instructions to move a scalar value from lane 63 of a vector register.
-(define_insn "mov_from_lane63_<mode>"
-  [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
-       (unspec:<SCALAR_MODE>
-         [(match_operand:V64_1REG 1 "register_operand"   "  v,v")]
-         UNSPEC_MOV_FROM_LANE63))]
-  ""
-  "@
-   v_readlane_b32\t%0, %1, 63
-   v_mov_b32\t%0, %1 wave_ror:1"
-  [(set_attr "type" "vop3a,vop_dpp")
-   (set_attr "exec" "none,*")
-   (set_attr "length" "8")])
-
-(define_insn "mov_from_lane63_<mode>"
-  [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
-       (unspec:<SCALAR_MODE>
-         [(match_operand:V64_2REG 1 "register_operand"   "  v,v")]
-         UNSPEC_MOV_FROM_LANE63))]
-  ""
-  "@
-   v_readlane_b32\t%L0, %L1, 63\;v_readlane_b32\t%H0, %H1, 63
-   * if (REGNO (operands[0]) <= REGNO (operands[1]))   \
-       return \"v_mov_b32\t%L0, %L1 wave_ror:1\;\"     \
-             \"v_mov_b32\t%H0, %H1 wave_ror:1\";       \
-     else                                              \
-       return \"v_mov_b32\t%H0, %H1 wave_ror:1\;\"     \
-             \"v_mov_b32\t%L0, %L1 wave_ror:1\";"
-  [(set_attr "type" "vop3a,vop_dpp")
-   (set_attr "exec" "none,*")
-   (set_attr "length" "8")])
-
  ;; }}}
  ;; {{{ Miscellaneous
  
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc

index a561976d7f54235eb01b1db25eb8c5b134276948..b9d9170f167650dd336f616737b8760c950153f4 100644 (file)
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -4918,23 +4918,25 @@ gcn_expand_dpp_shr_insn (machine_mode mode, const char *insn,
  
     The vector register SRC of mode MODE is reduced using the operation given
     by UNSPEC, and the scalar result is returned in lane 63 of a vector
-   register.  */
-/* FIXME: Implement reductions for sizes other than V64.
-          (They're currently disabled in the machine description.)  */
+   register (or lane 31, 15, 7, 3, 1 for partial vectors).  */
  
  rtx
  gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec)
  {
    machine_mode orig_mode = mode;
+  machine_mode scalar_mode = GET_MODE_INNER (mode);
+  int vf = GET_MODE_NUNITS (mode);
    bool use_moves = (((unspec == UNSPEC_SMIN_DPP_SHR
+                     || unspec == UNSPEC_SMIN_DPP_SHR
                       || unspec == UNSPEC_SMAX_DPP_SHR
                       || unspec == UNSPEC_UMIN_DPP_SHR
                       || unspec == UNSPEC_UMAX_DPP_SHR)
-                    && (mode == V64DImode
-                        || mode == V64DFmode))
+                    && (scalar_mode == DImode
+                        || scalar_mode == DFmode))
                     || (unspec == UNSPEC_PLUS_DPP_SHR
-                       && mode == V64DFmode));
+                       && scalar_mode == DFmode));
    rtx_code code = (unspec == UNSPEC_SMIN_DPP_SHR ? SMIN
+                  : unspec == UNSPEC_SMIN_DPP_SHR ? SMIN
                    : unspec == UNSPEC_SMAX_DPP_SHR ? SMAX
                    : unspec == UNSPEC_UMIN_DPP_SHR ? UMIN
                    : unspec == UNSPEC_UMAX_DPP_SHR ? UMAX
@@ -4944,23 +4946,23 @@ gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec)
                        || unspec == UNSPEC_SMAX_DPP_SHR
                        || unspec == UNSPEC_UMIN_DPP_SHR
                        || unspec == UNSPEC_UMAX_DPP_SHR)
-                     && (mode == V64QImode
-                         || mode == V64HImode));
+                     && (scalar_mode == QImode
+                         || scalar_mode == HImode));
    bool unsignedp = (unspec == UNSPEC_UMIN_DPP_SHR
                     || unspec == UNSPEC_UMAX_DPP_SHR);
    bool use_plus_carry = unspec == UNSPEC_PLUS_DPP_SHR
                         && GET_MODE_CLASS (mode) == MODE_VECTOR_INT
-                       && (TARGET_GCN3 || mode == V64DImode);
+                       && (TARGET_GCN3 || scalar_mode == DImode);
  
    if (use_plus_carry)
      unspec = UNSPEC_PLUS_CARRY_DPP_SHR;
  
    if (use_extends)
      {
-      rtx tmp = gen_reg_rtx (V64SImode);
+      mode = VnMODE (vf, SImode);
+      rtx tmp = gen_reg_rtx (mode);
        convert_move (tmp, src, unsignedp);
        src = tmp;
-      mode = V64SImode;
      }
  
    /* Perform reduction by first performing the reduction operation on every
@@ -4968,7 +4970,8 @@ gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec)
       iteration (thereby effectively reducing every 4 lanes) and so on until
       all lanes are reduced.  */
    rtx in, out = force_reg (mode, src);
-  for (int i = 0, shift = 1; i < 6; i++, shift <<= 1)
+  int iterations = exact_log2 (vf);
+  for (int i = 0, shift = 1; i < iterations; i++, shift <<= 1)
      {
        rtx shift_val = gen_rtx_CONST_INT (VOIDmode, shift);
        in = out;
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md

index a3c9523cd6dc6e31c3ba1ab1ce2b77965e4f4e61..6c1a438f9d1a0b82191f92bf49fd66d1b7f2a4dd 100644 (file)
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -78,7 +78,6 @@
    UNSPEC_PLUS_CARRY_DPP_SHR UNSPEC_PLUS_CARRY_IN_DPP_SHR
    UNSPEC_AND_DPP_SHR UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR
    UNSPEC_MOV_DPP_SHR
-  UNSPEC_MOV_FROM_LANE63
    UNSPEC_GATHER
    UNSPEC_SCATTER
    UNSPEC_RCP
author	Andrew Stubbs <ams@codesourcery.com>
	Fri, 28 Oct 2022 11:38:43 +0000 (12:38 +0100)
committer	Andrew Stubbs <ams@codesourcery.com>
	Mon, 31 Oct 2022 12:20:52 +0000 (12:20 +0000)
gcc/config/gcn/gcn-valu.md		patch \| blob \| blame \| history
gcc/config/gcn/gcn.cc		patch \| blob \| blame \| history
gcc/config/gcn/gcn.md		patch \| blob \| blame \| history