]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
AVR: PR117726 - Tweak 32-bit logical shifts of 25...30 for -Oz.
authorGeorg-Johann Lay <avr@gjlay.de>
Wed, 22 Jan 2025 20:11:22 +0000 (21:11 +0100)
committerGeorg-Johann Lay <avr@gjlay.de>
Thu, 23 Jan 2025 09:13:42 +0000 (10:13 +0100)
As it turns out, logical 32-bit shifts with an offset of 25..30 can
be performed in 7 instructions or less.  This beats the 7 instruc-
tions required for the default code of a shift loop.
Plus, with zero overhead, these cases can be 3-operand.

This is only relevant for -Oz because with -Os, 3op shifts are
split with -msplit-bit-shift (which is not performed with -Oz).

PR target/117726
gcc/
* config/avr/avr.cc (avr_ld_regno_p): New function.
(ashlsi3_out) [case 25,26,27,28,29,30]: Handle and tweak.
(lshrsi3_out): Same.
(avr_rtx_costs_1) [SImode, ASHIFT, LSHIFTRT]: Adjust costs.
* config/avr/avr.md (ashlsi3, *ashlsi3, *ashlsi3_const):
Add "r,r,C4L" alternative.
(lshrsi3, *lshrsi3, *lshrsi3_const): Add "r,r,C4R" alternative.
* config/avr/constraints.md (C4R, C4L): New,
gcc/testsuite/
* gcc.target/avr/torture/avr-torture.exp (AVR_TORTURE_OPTIONS):
Turn one option variant into -Oz.

gcc/config/avr/avr.cc
gcc/config/avr/avr.md
gcc/config/avr/constraints.md
gcc/testsuite/gcc.target/avr/torture/avr-torture.exp

index e5a5aa34ec04b73592f728a02fb4d53f05490afd..8628a438ab56a7d36fde7085062d38d7f6ea95ee 100644 (file)
@@ -418,6 +418,15 @@ avr_adiw_reg_p (rtx reg)
 }
 
 
+/* Return true iff REGNO is in R16...R31.  */
+
+static bool
+avr_ld_regno_p (int regno)
+{
+  return TEST_HARD_REG_CLASS (LD_REGS, regno);
+}
+
+
 static bool
 ra_in_progress ()
 {
@@ -7397,17 +7406,20 @@ ashlsi3_out (rtx_insn *insn, rtx operands[], int *plen)
 {
   if (CONST_INT_P (operands[2]))
     {
+      int off = INTVAL (operands[2]);
       int reg0 = true_regnum (operands[0]);
       int reg1 = true_regnum (operands[1]);
       bool reg1_unused_after = reg_unused_after (insn, operands[1]);
-
+      bool scratch_p = (GET_CODE (PATTERN (insn)) == PARALLEL
+                       && XVECLEN (PATTERN (insn), 0) == 3
+                       && REG_P (operands[3]));
       if (plen)
        *plen = 0;
 
-      switch (INTVAL (operands[2]))
+      switch (off)
        {
        default:
-         if (INTVAL (operands[2]) < 32)
+         if (off < 32)
            break;
 
          return AVR_HAVE_MOVW
@@ -7461,11 +7473,58 @@ ashlsi3_out (rtx_insn *insn, rtx operands[], int *plen)
                           "mov %D0,%B1"  CR_TAB
                           "clr %B0"      CR_TAB
                           "clr %A0", operands, plen, 4);
+       case 30:
+         if (AVR_HAVE_MUL && scratch_p)
+           return avr_asm_len ("ldi %3,1<<6"       CR_TAB
+                               "mul %3,%A1"        CR_TAB
+                               "mov %D0,r0"        CR_TAB
+                               "clr __zero_reg__"  CR_TAB
+                               "clr %C0"           CR_TAB
+                               "clr %B0"           CR_TAB
+                               "clr %A0", operands, plen, 7);
+         // Fallthrough
+
+       case 28:
+       case 29:
+         {
+           const bool ld_reg0_p = avr_ld_regno_p (reg0 + 3); // %D0
+           const bool ld_reg1_p = avr_ld_regno_p (reg1 + 0); // %A1
+           if (ld_reg0_p
+               || (ld_reg1_p && reg1_unused_after)
+               || scratch_p)
+             {
+               if (ld_reg0_p)
+                 avr_asm_len ("mov %D0,%A1"    CR_TAB
+                              "swap %D0"       CR_TAB
+                              "andi %D0,0xf0", operands, plen, 3);
+               else if (ld_reg1_p && reg1_unused_after)
+                 avr_asm_len ("swap %A1"       CR_TAB
+                              "andi %A1,0xf0"  CR_TAB
+                              "mov %D0,%A1", operands, plen, 3);
+               else
+                 avr_asm_len ("mov %D0,%A1"    CR_TAB
+                              "swap %D0"       CR_TAB
+                              "ldi %3,0xf0"    CR_TAB
+                              "and %D0,%3", operands, plen, 4);
+               for (int i = 28; i < off; ++i)
+                 avr_asm_len ("lsl %D0", operands, plen, 1);
+               return avr_asm_len ("clr %C0"  CR_TAB
+                                   "clr %B0"  CR_TAB
+                                   "clr %A0", operands, plen, 3);
+             }
+         }
+         // Fallthrough
+
        case 24:
-         return avr_asm_len ("mov %D0,%A1"  CR_TAB
-                             "clr %C0"      CR_TAB
+       case 25:
+       case 26:
+       case 27:
+         avr_asm_len ("mov %D0,%A1", operands, plen, 1);
+         for (int i = 24; i < off; ++i)
+           avr_asm_len ("lsl %D0", operands, plen, 1);
+         return avr_asm_len ("clr %C0"      CR_TAB
                              "clr %B0"      CR_TAB
-                             "clr %A0", operands, plen, 4);
+                             "clr %A0", operands, plen, 3);
        case 31:
          return AVR_HAVE_MOVW
            ? avr_asm_len ("bst %A1,0"    CR_TAB
@@ -8298,17 +8357,20 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *plen)
 {
   if (CONST_INT_P (operands[2]))
     {
+      int off = INTVAL (operands[2]);
       int reg0 = true_regnum (operands[0]);
       int reg1 = true_regnum (operands[1]);
       bool reg1_unused_after = reg_unused_after (insn, operands[1]);
-
+      bool scratch_p = (GET_CODE (PATTERN (insn)) == PARALLEL
+                       && XVECLEN (PATTERN (insn), 0) == 3
+                       && REG_P (operands[3]));
       if (plen)
        *plen = 0;
 
-      switch (INTVAL (operands[2]))
+      switch (off)
        {
        default:
-         if (INTVAL (operands[2]) < 32)
+         if (off < 32)
            break;
 
          return AVR_HAVE_MOVW
@@ -8362,11 +8424,58 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *plen)
                           "mov %A0,%C1" CR_TAB
                           "clr %C0"     CR_TAB
                           "clr %D0", operands, plen, 4);
+       case 30:
+         if (AVR_HAVE_MUL && scratch_p)
+           return avr_asm_len ("ldi %3,1<<2"       CR_TAB
+                               "mul %3,%D1"        CR_TAB
+                               "mov %A0,r1"        CR_TAB
+                               "clr __zero_reg__"  CR_TAB
+                               "clr %B0"           CR_TAB
+                               "clr %C0"           CR_TAB
+                               "clr %D0", operands, plen, 7);
+         // Fallthrough
+
+       case 29:
+       case 28:
+         {
+           const bool ld_reg0_p = avr_ld_regno_p (reg0 + 0); // %A0
+           const bool ld_reg1_p = avr_ld_regno_p (reg1 + 3); // %D1
+           if (ld_reg0_p
+               || (ld_reg1_p && reg1_unused_after)
+               || scratch_p)
+             {
+               if (ld_reg0_p)
+                 avr_asm_len ("mov %A0,%D1"    CR_TAB
+                              "swap %A0"       CR_TAB
+                              "andi %A0,0x0f", operands, plen, 3);
+               else if (ld_reg1_p && reg1_unused_after)
+                 avr_asm_len ("swap %D1"       CR_TAB
+                              "andi %D1,0x0f"  CR_TAB
+                              "mov %A0,%D1", operands, plen, 3);
+               else
+                 avr_asm_len ("mov %A0,%D1"    CR_TAB
+                              "swap %A0"       CR_TAB
+                              "ldi %3,0x0f"    CR_TAB
+                              "and %A0,%3", operands, plen, 4);
+               for (int i = 28; i < off; ++i)
+                 avr_asm_len ("lsr %A0", operands, plen, 1);
+               return avr_asm_len ("clr %B0"  CR_TAB
+                                   "clr %C0"  CR_TAB
+                                   "clr %D0", operands, plen, 3);
+             }
+         }
+         // Fallthrough
+
+       case 27:
+       case 26:
+       case 25:
        case 24:
-         return avr_asm_len ("mov %A0,%D1" CR_TAB
-                             "clr %B0"     CR_TAB
+         avr_asm_len ("mov %A0,%D1", operands, plen, 1);
+         for (int i = 24; i < off; ++i)
+           avr_asm_len ("lsr %A0", operands, plen, 1);
+         return avr_asm_len ("clr %B0"     CR_TAB
                              "clr %C0"     CR_TAB
-                             "clr %D0", operands, plen, 4);
+                             "clr %D0", operands, plen, 3);
        case 31:
          return AVR_HAVE_MOVW
            ? avr_asm_len ("bst %D1,7"    CR_TAB
@@ -13037,9 +13146,6 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
              case 0:
                *total = 0;
                break;
-             case 24:
-               *total = COSTS_N_INSNS (3);
-               break;
              case 1:
              case 8:
                *total = COSTS_N_INSNS (4);
@@ -13050,6 +13156,19 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
              case 16:
                *total = COSTS_N_INSNS (4 - AVR_HAVE_MOVW);
                break;
+             case 24:
+             case 25:
+             case 26:
+             case 27:
+               *total = COSTS_N_INSNS (4 + val1 - 24);
+               break;
+             case 28:
+             case 29:
+               *total = COSTS_N_INSNS (6 + val1 - 28);
+               break;
+             case 30:
+               *total = COSTS_N_INSNS (!speed && AVR_HAVE_MUL ? 7 : 8);
+               break;
              case 31:
                *total = COSTS_N_INSNS (6);
                break;
@@ -13346,6 +13465,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
                *total = 0;
                break;
              case 1:
+             case 8:
                *total = COSTS_N_INSNS (4);
                break;
              case 2:
@@ -13357,9 +13477,18 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
              case 16:
                *total = COSTS_N_INSNS (4 - AVR_HAVE_MOVW);
                break;
-             case 8:
              case 24:
-               *total = COSTS_N_INSNS (4);
+             case 25:
+             case 26:
+             case 27:
+               *total = COSTS_N_INSNS (4 + val1 - 24);
+               break;
+             case 28:
+             case 29:
+               *total = COSTS_N_INSNS (6 + val1 - 28);
+               break;
+             case 30:
+               *total = COSTS_N_INSNS (!speed && AVR_HAVE_MUL ? 7 : 8);
                break;
              case 31:
                *total = COSTS_N_INSNS (6);
index 594940c67819991d928a5b08bd48086211d4e553..6550fadd101710a18aaf6fb894166984cd898290 100644 (file)
 ;; "ashlsq3"  "ashlusq3"
 ;; "ashlsa3"  "ashlusa3"
 (define_insn_and_split "ashl<mode>3"
-  [(set (match_operand:ALL4 0 "register_operand"                "=r,r  ,r        ,r  ,r,r")
-        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r        ,r  ,0,0")
-                     (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C15 C31,C4l,n,Qm")))]
+  [(set (match_operand:ALL4 0 "register_operand"                "=r,r  ,r    ,r  ,r,r")
+        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r    ,r  ,0,0")
+                     (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C4L,C4l,n,Qm")))]
   ""
   "#"
   "&& reload_completed"
   [(set_attr "isa" "*,*,*,3op,*,*")])
 
 (define_insn "*ashl<mode>3"
-  [(set (match_operand:ALL4 0 "register_operand"                "=r,r  ,r        ,r  ,r,r")
-        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r        ,r  ,0,0")
-                     (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C15 C31,C4l,n,Qm")))
+  [(set (match_operand:ALL4 0 "register_operand"                "=r,r  ,r    ,r  ,r,r")
+        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r    ,r  ,0,0")
+                     (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C4L,C4l,n,Qm")))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
 ;; "*ashlsq3_const"  "*ashlusq3_const"
 ;; "*ashlsa3_const"  "*ashlusa3_const"
 (define_insn "*ashl<mode>3_const"
-  [(set (match_operand:ALL4 0 "register_operand"             "=r ,r        ,r  ,r")
-        (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r        ,r  ,0")
-                     (match_operand:QI 2 "const_int_operand"  "LP,O C15 C31,C4l,n")))
-   (clobber (match_operand:QI 3 "scratch_or_dreg_operand"    "=X ,X        ,&d ,&d"))
+  [(set (match_operand:ALL4 0 "register_operand"             "=r ,r    ,r  ,r")
+        (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r    ,r  ,0")
+                     (match_operand:QI 2 "const_int_operand"  "LP,O C4L,C4l,n")))
+   (clobber (match_operand:QI 3 "scratch_or_dreg_operand"    "=X ,X    ,&d ,&d"))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
 ;; "lshrsq3"  "lshrusq3"
 ;; "lshrsa3"  "lshrusa3"
 (define_insn_and_split "lshr<mode>3"
-  [(set (match_operand:ALL4 0 "register_operand"                  "=r,r  ,r        ,r  ,r,r")
-        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r        ,r  ,0,0")
-                       (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C15 C31,C4r,n,Qm")))]
+  [(set (match_operand:ALL4 0 "register_operand"                  "=r,r  ,r    ,r  ,r,r")
+        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r    ,r  ,0,0")
+                       (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C4R,C4r,n,Qm")))]
   ""
   "#"
   "&& reload_completed"
   [(set_attr "isa" "*,*,*,3op,*,*")])
 
 (define_insn "*lshr<mode>3"
-  [(set (match_operand:ALL4 0 "register_operand"                  "=r,r  ,r        ,r  ,r,r")
-        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r        ,r  ,0,0")
-                       (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C15 C31,C4r,n,Qm")))
+  [(set (match_operand:ALL4 0 "register_operand"                  "=r,r  ,r    ,r  ,r,r")
+        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r    ,r  ,0,0")
+                       (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C4R,C4r,n,Qm")))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
 ;; "*lshrsq3_const"  "*lshrusq3_const"
 ;; "*lshrsa3_const"  "*lshrusa3_const"
 (define_insn "*lshr<mode>3_const"
-  [(set (match_operand:ALL4 0 "register_operand"               "=r ,r        ,r  ,r")
-        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r        ,r  ,0")
-                       (match_operand:QI 2 "const_int_operand"  "LP,O C15 C31,C4r,n")))
-   (clobber (match_operand:QI 3 "scratch_or_dreg_operand"      "=X ,X        ,&d ,&d"))
+  [(set (match_operand:ALL4 0 "register_operand"               "=r ,r    ,r  ,r")
+        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r    ,r  ,0")
+                       (match_operand:QI 2 "const_int_operand"  "LP,O C4R,C4r,n")))
+   (clobber (match_operand:QI 3 "scratch_or_dreg_operand"      "=X ,X    ,&d ,&d"))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
index fc8d4d56a66696149f55aa12029c9e5a84a30c3e..2ca9cc3d88dde68a820f480f1440b16030d657b0 100644 (file)
   (and (match_code "const_int")
        (match_test "avr_split_shift_p (4, ival, ASHIFT)")))
 
+(define_constraint "C4R"
+  "A constant integer shift offset for a 4-byte LSHIFTRT that's a 3-operand insn independent of options."
+  (and (match_code "const_int")
+       (match_test "ival == 15 || IN_RANGE (ival, 25, 31)")))
+
+(define_constraint "C4L"
+  "A constant integer shift offset for a 4-byte ASHIFT that's a 3-operand insn independent of options."
+  (and (match_code "const_int")
+       (match_test "ival == 15 || IN_RANGE (ival, 25, 31)")))
 
 ;; CONST_FIXED is no element of 'n' so cook our own.
 ;; "i" or "s" would match but because the insn uses iterators that cover
index 158ef2d4c5abdf2e12f3834f675d88a04f65aef7..91bbe26f10b33b2cb19402077b703a724d197c0c 100644 (file)
@@ -45,7 +45,7 @@ dg-init
        { -Os -fomit-frame-pointer } \
        { -Os -fomit-frame-pointer -finline-functions } \
        { -O3 -g } \
-       { -Os -mcall-prologues} ]
+       { -Oz -mcall-prologues} ]
 
 
 #Initialize use of torture lists.