Update copyright years.

[thirdparty/gcc.git] / gcc / config / cris / cris.md
diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md

index d5a3c703a838ba3df444d6f9d115e2c9e9525614..c15395bd84c495290b43ccb96c9f53664abd03c5 100644 (file)
--- a/gcc/config/cris/cris.md
+++ b/gcc/config/cris/cris.md
@@ -1,5 +1,5 @@
  ;; GCC machine description for CRIS cpu cores.
-;; Copyright (C) 1998-2021 Free Software Foundation, Inc.
+;; Copyright (C) 1998-2024 Free Software Foundation, Inc.
  ;; Contributed by Axis Communications.
  
  ;; This file is part of GCC.
@@ -50,9 +50,6 @@
    [
     ;; Stack frame deallocation barrier.
     CRIS_UNSPEC_FRAME_DEALLOC
-
-   ;; Swap all 32 bits of the operand; 31 <=> 0, 30 <=> 1...
-   CRIS_UNSPEC_SWAP_BITS
    ])
  
  ;; Register numbers.
@@ -60,7 +57,6 @@
    [(CRIS_STATIC_CHAIN_REGNUM 7)
     (CRIS_REAL_FP_REGNUM 8)
     (CRIS_SP_REGNUM 14)
-   (CRIS_ACR_REGNUM 15)
     (CRIS_SRP_REGNUM 16)
     (CRIS_MOF_REGNUM 17)
     (CRIS_AP_REGNUM 18)
@@ -154,9 +150,20 @@
         (not (match_test "dead_or_set_regno_p (insn, CRIS_SRP_REGNUM)")))
     (nil) (nil)])
  
+;; Enable choosing particular instructions.  The discriminator choice
+;; "v0" stands for "pre-v10", for brevity.
+(define_attr "cpu_variant" "default,v0,v10" (const_string "default"))
+
  (define_attr "enabled" "no,yes"
    (if_then_else
-   (eq_attr "cc_enabled" "normal")
+   (and
+    (eq_attr "cc_enabled" "normal")
+    (ior
+     (eq_attr "cpu_variant" "default")
+     (and (eq_attr "cpu_variant" "v10")
+         (match_test "TARGET_HAS_MUL_INSNS"))
+     (and (eq_attr "cpu_variant" "v0")
+         (not (match_test "TARGET_HAS_MUL_INSNS")))))
     (const_string "yes")
     (const_string "no")))
  \f
@@ -173,6 +180,10 @@
  
  (define_mode_iterator WD [SI HI])
  (define_mode_iterator BW [HI QI])
+
+; Another "BW" for use where an independent iteration is needed.
+(define_mode_iterator BW2 [HI QI])
+
  (define_mode_attr S [(SI "HI") (HI "QI")])
  (define_mode_attr s [(SI "hi") (HI "qi")])
  (define_mode_attr m [(SI ".d") (HI ".w") (QI ".b")])
@@ -194,6 +205,9 @@
  ;; Ditto, commutative operators (i.e. not minus).
  (define_code_iterator plusumin [plus umin])
  
+;; For opsplit1.
+(define_code_iterator splitop [and plus])
+
  ;; The addsubbo and nd code-attributes form a hack.  We need to output
  ;; "addu.b", "subu.b" but "bound.b" (no "u"-suffix) which means we'd
  ;; need to refer to one iterator from the next.  But, that can't be
@@ -514,7 +528,7 @@
       emitted) is the final value.  */
    if ((CONST_INT_P (operands[1]) || GET_CODE (operands[1]) == CONST_DOUBLE)
        && ! reload_completed
-      && ! reload_in_progress)
+      && ! lra_in_progress)
      {
        rtx insns;
        rtx op0 = operands[0];
@@ -579,13 +593,14 @@
  (define_insn "*movsi_internal<setcc><setnz><setnzvc>"
    [(set
      (match_operand:SI 0 "nonimmediate_operand"
-                     "=r,r, r,Q>,r,Q>,g,r,r,g,rQ>,x,  m,x")
+                     "=r,r, r,Q>,r,Q>,g,r,r,g,rQ>,x,  m,x, Q>,r,g")
      (match_operand:SI 1 "general_operand"
-                      "r,Q>,M,M, I,r, M,n,g,r,x,  rQ>,x,gi"))
+                      "r,Q>,M,M, I,r, M,n,g,r,x,  rQ>,x,gi,r, g,r"))
     (clobber (reg:CC CRIS_CC0_REGNUM))]
-    ;; Note that we prefer not to use the S alternative (if for some reason
-    ;; it competes with others) above, but g matches S.
-  ""
+  ;; Avoid matching insns we know must be reloaded.  Without one
+  ;; operand being a (pseudo-)register, reload chooses
+  ;; reload-registers suboptimally.
+  "REG_S_P (operands[0]) || REG_S_P (operands[1]) || operands[1] == const0_rtx"
  {
    /* Better to have c-switch here; it is worth it to optimize the size of
       move insns.  The alternative would be to try to find more constraint
@@ -597,6 +612,9 @@
      case 5:
      case 8:
      case 9:
+    case 14:
+    case 15:
+    case 16:
        return "move.d %1,%0";
  
      case 10:
@@ -634,9 +652,10 @@
        gcc_unreachable ();
      }
  }
-  [(set_attr "slottable" "yes,yes,yes,yes,yes,yes,no,no,no,no,yes,yes,no,no")
+  [(set_attr "cpu_variant" "*,*,*,*,*,v0,*,*,v0,v0,*,*,*,*,v10,v10,v10")
+   (set_attr "slottable" "yes,yes,yes,yes,yes,yes,no,no,no,no,yes,yes,no,no,yes,no,no")
     (set_attr "cc<cccc><ccnz><ccnzvc>"
-            "*,*,none,none,*,none,none,*,*,none,none,none,none,none")])
+            "*,*,none,none,*,none,none,*,*,none,none,none,none,none,none,*,none")])
  \f
  ;; FIXME: See movsi.
  
@@ -1309,7 +1328,7 @@
     && operands[1] != frame_pointer_rtx
     && CONST_INT_P (operands[3])
     && (INTVAL (operands[3]) == 2 || INTVAL (operands[3]) == 4)
-   && (reload_in_progress || reload_completed)"
+   && (lra_in_progress || reload_completed)"
    "#"
    "&& 1"
    [(set (match_dup 0)
@@ -1347,12 +1366,63 @@
  {
    rtx reg = operands[0];
    rtx_insn *i = next_nonnote_nondebug_insn_bb (curr_insn);
+  rtx x, src, dest;
  
    while (i != NULL_RTX && (!INSN_P (i) || DEBUG_INSN_P (i)))
      i = next_nonnote_nondebug_insn_bb (i);
  
-  if (i == NULL_RTX || reg_mentioned_p (reg, i) || BARRIER_P (i))
+  /* We don't want to strip the clobber if the next insn possibly uses the
+     zeroness of the result.  Preferably fail only if we see a compare insn
+     that looks eliminable and with the register "reg" compared.  With some
+     effort we could also check for an equality test (EQ, NE) in the post-split
+     user, just not for now.  */
+  if (i == NULL_RTX)
+    FAIL;
+
+  x = single_set (i);
+
+  /* We explicitly need to bail on a BARRIER, but that's implied by a failing
+     single_set test.  */
+  if (x == NULL_RTX)
      FAIL;
+
+  src = SET_SRC (x);
+  dest = SET_DEST (x);
+
+  /* Bail on (post-split) eliminable compares.  */
+  if (REG_P (dest) && REGNO (dest) == CRIS_CC0_REGNUM
+      && GET_CODE (src) == COMPARE)
+    {
+      rtx cop0 = XEXP (src, 0);
+
+      if (REG_P (cop0) && REGNO (cop0) == REGNO (reg)
+         && XEXP (src, 1) == const0_rtx)
+       FAIL;
+    }
+
+  /* Bail out if we see a (pre-split) cbranch or cstore where the comparison
+     looks eliminable and uses the destination register in this addition.  We
+     don't need to look very deep: a single_set which is a parallel clobbers
+     something, and (one of) that something, is always CRIS_CC0_REGNUM here.
+     Also, the entities we're looking for are two-element parallels.  A
+     split-up cbranch or cstore doesn't clobber CRIS_CC0_REGNUM.  A cbranch has
+     if_then_else as its source with a comparison operator as the condition,
+     and a cstore has a source with the comparison operator directly.  That
+     also matches dstep, so look for pc as destination for the if_then_else.
+     We error on the safe side if we happen to catch other conditional entities
+     and FAIL, that just means the split won't happen.  */
+  if (GET_CODE (PATTERN (i)) == PARALLEL && XVECLEN (PATTERN (i), 0) == 2)
+    {
+      rtx cmp
+       = (GET_CODE (src) == IF_THEN_ELSE && dest == pc_rtx
+          ? XEXP (src, 0)
+          : (COMPARISON_P (src) ? src : NULL_RTX));
+      gcc_assert (cmp == NULL_RTX || COMPARISON_P (cmp));
+
+      if (cmp && REG_P (XEXP (cmp, 0)) && XEXP (cmp, 1) == const0_rtx
+         && REGNO (XEXP (cmp, 0)) == REGNO (reg))
+       FAIL;
+    }
  })
  
  (define_insn "<u>mul<s><mode>3"
@@ -2104,8 +2174,7 @@
  
  (define_insn "cris_swap_bits"
    [(set (match_operand:SI 0 "register_operand" "=r")
-       (unspec:SI [(match_operand:SI 1 "register_operand" "0")]
-                  CRIS_UNSPEC_SWAP_BITS))
+       (bitreverse:SI (match_operand:SI 1 "register_operand" "0")))
     (clobber (reg:CC CRIS_CC0_REGNUM))]
    "TARGET_HAS_SWAP"
    "swapwbr %0"
@@ -2120,8 +2189,7 @@
           (match_operand:SI 1 "register_operand"))
       (clobber (reg:CC CRIS_CC0_REGNUM))])
     (parallel
-    [(set (match_dup 2)
-         (unspec:SI [(match_dup 2)] CRIS_UNSPEC_SWAP_BITS))
+    [(set (match_dup 2) (bitreverse:SI (match_dup 2)))
       (clobber (reg:CC CRIS_CC0_REGNUM))])
     (parallel
      [(set (match_operand:SI 0 "register_operand")
@@ -2617,6 +2685,59 @@
      = INTVAL (operands[2]) <= 0xff ? GEN_INT (0xff) :  GEN_INT (0xffff);
  })
  
+;; Avoid, after opsplit1 with AND (below), sequences of:
+;;  lsrq N,R
+;;  lslq M,R
+;;  lsrq M,R
+;; (N < M), where we can fold the first lsrq into the lslq-lsrq, like:
+;;  lslq M-N,R
+;;  lsrq M,R
+;; We have to match this before opsplit1 below and before other peephole2s of
+;; lesser value, since peephole2 matching resumes at the first generated insn,
+;; and thus wouldn't match a pattern of the three shifts after opsplit1/AND.
+;; Note that this lsrandsplit1 is in turn of lesser value than movulsr, since
+;; that one doesn't require the same operand for source and destination, but
+;; they happen to be the same hard-register at peephole2 time even if
+;; naturally separated like in peep2-movulsr2.c, thus this placement.  (Source
+;; and destination will be re-separated and the move optimized out in
+;; cprop_hardreg at time of this writing.)
+;; Testcase: gcc.target/cris/peep2-lsrandsplit1.c
+(define_peephole2 ; lsrandsplit1
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand")
+         (lshiftrt:SI
+          (match_operand:SI 1 "register_operand")
+          (match_operand:SI 2 "const_int_operand")))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])
+   (parallel
+    [(set (match_operand 3 "register_operand")
+         (and
+          (match_operand 4 "register_operand")
+          (match_operand 5 "const_int_operand")))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])]
+  "REGNO (operands[0]) == REGNO (operands[1])
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && REGNO (operands[0]) == REGNO (operands[4])
+   && (INTVAL (operands[2])
+       < (clz_hwi (INTVAL (operands[5])) - (HOST_BITS_PER_WIDE_INT - 32)))
+   && cris_splittable_constant_p (INTVAL (operands[5]), AND, SImode,
+                                 optimize_function_for_speed_p (cfun)) == 2"
+  ;; We're guaranteed by the above hw_clz test (certainly non-zero) and the
+  ;; test for a two-insn return-value from cris_splittable_constant_p, that
+  ;; the cris_splittable_constant_p AND-replacement would be lslq-lsrq.
+  [(parallel
+    [(set (match_dup 0) (ashift:SI (match_dup 0) (match_dup 9)))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])
+   (parallel
+    [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (match_dup 10)))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])]
+{
+  HOST_WIDE_INT shiftval
+    = clz_hwi (INTVAL (operands[5])) - (HOST_BITS_PER_WIDE_INT - 32);
+  operands[9] = GEN_INT (shiftval - INTVAL (operands[2]));
+  operands[10] = GEN_INT (shiftval);
+})
+
  ;; Testcase for the following four peepholes: gcc.target/cris/peep2-xsrand.c
  
  (define_peephole2 ; asrandb
@@ -2817,6 +2938,117 @@
    operands[3] = gen_rtx_ZERO_EXTEND (SImode, op1);
    operands[4] = GEN_INT (trunc_int_for_mode (INTVAL (operands[1]), QImode));
  })
+
+;; Somewhat similar to andqu, but a different range and expansion,
+;; intended to feed the output into opsplit1 with AND:
+;;  move.d 0x7ffff,$r10
+;;  and.d $r11,$r10
+;; into:
+;;  move.d $r11,$r10
+;;  and.d 0x7ffff,$r10
+;; which opsplit1/AND will change into:
+;;  move.d $r11,$r10 (unaffected by opsplit1/AND; shown only for context)
+;;  lslq 13,$r10
+;;  lsrq 13,$r10
+;; thereby winning in space, but in time only if the 0x7ffff happened to
+;; be unaligned in the code.
+(define_peephole2 ; movandsplit1
+  [(parallel
+    [(set (match_operand 0 "register_operand")
+         (match_operand 1 "const_int_operand"))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])
+   (parallel
+    [(set (match_operand 2 "register_operand")
+         (and (match_operand 3 "register_operand")
+              (match_operand 4 "register_operand")))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])]
+  "REGNO (operands[0]) == REGNO (operands[2])
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && cris_splittable_constant_p (INTVAL (operands[1]), AND,
+                                 GET_MODE (operands[2]),
+                                 optimize_function_for_speed_p (cfun))"
+  [(parallel
+    [(set (match_dup 2) (match_dup 4))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])
+   (parallel
+    [(set (match_dup 2) (match_dup 5))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])]
+{
+  operands[5] = gen_rtx_AND (GET_MODE (operands[2]), operands[2], operands[1]);
+})
+
+;; Large (read: non-quick) numbers can sometimes be AND:ed by other means.
+;; Testcase: gcc.target/cris/peep2-andsplit1.c
+;; 
+;; Another case is add<ext> N,rx with -126..-64,64..126: it has the same
+;; size and execution time as two addq or subq, but addq and subq can fill
+;; a delay-slot.
+(define_peephole2 ; opsplit1
+  [(parallel
+    [(set (match_operand 0 "register_operand")
+         (splitop
+          (match_operand 1 "register_operand")
+          (match_operand 2 "const_int_operand")))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])]
+   ;; Operands 0 and 1 can be separate identical objects, at least
+   ;; after matching peepholes above.  */
+  "REGNO (operands[0]) == REGNO (operands[1])
+   && cris_splittable_constant_p (INTVAL (operands[2]), <CODE>,
+                                 GET_MODE (operands[0]),
+                                 optimize_function_for_speed_p (cfun))"
+  [(const_int 0)]
+{
+  cris_split_constant (INTVAL (operands[2]), <CODE>, GET_MODE (operands[0]),
+                      optimize_function_for_speed_p (cfun),
+                      true, operands[0], operands[0]);
+  DONE;
+})
+
+;; Fix a decomposed szext: fuse it with the memory operand of the
+;; load.  This is typically the sign-extension part of a decomposed
+;; "indirect offset" address.
+(define_peephole2 ; lra_szext_decomposed
+  [(parallel
+    [(set (match_operand:BW 0 "register_operand")
+         (match_operand:BW 1 "memory_operand"))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])
+   (parallel
+    [(set (match_operand:SI 2 "register_operand") (szext:SI (match_dup 0)))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])]
+  "REGNO (operands[0]) == REGNO (operands[2])
+   || peep2_reg_dead_p (2, operands[0])"
+  [(parallel
+    [(set (match_dup 2) (szext:SI (match_dup 1)))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])])
+
+;; Re-compose a decomposed "indirect offset" address for a szext
+;; operation.  The non-clobbering "addi" is generated by LRA.
+;; This and lra_szext_decomposed is covered by cris/rld-legit1.c.
+(define_peephole2 ; lra_szext_decomposed_indirect_with_offset
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand")
+         (sign_extend:SI (mem:BW (match_operand:SI 1 "register_operand"))))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])
+   (set (match_dup 0)
+       (plus:SI (match_dup 0) (match_operand:SI 2 "register_operand")))
+   (parallel
+    [(set (match_operand:SI 3 "register_operand")
+         (szext:SI (mem:BW2 (match_dup 0))))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])]
+  "(REGNO (operands[0]) == REGNO (operands[3])
+    || peep2_reg_dead_p (3, operands[0]))
+   && (REGNO (operands[0]) == REGNO (operands[1])
+       || peep2_reg_dead_p (3, operands[0]))"
+  [(parallel
+    [(set
+      (match_dup 3)
+      (szext:SI
+       (mem:BW2 (plus:SI (szext:SI (mem:BW (match_dup 1))) (match_dup 2)))))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])])
+
+;; Add operations with similar or same decomposed addresses here, when
+;; encountered - but only when covered by mentioned test-cases for at
+;; least one of the cases generalized in the pattern.
  \f
  ;; Local variables:
  ;; mode:emacs-lisp