re PR target/14471 (Workaround for errata regarding multiplication bug)

author Hans-Peter Nilsson <hp@axis.com>

Mon, 8 Mar 2004 00:56:30 +0000 (00:56 +0000)

committer Hans-Peter Nilsson <hp@gcc.gnu.org>

Mon, 8 Mar 2004 00:56:30 +0000 (00:56 +0000)
author Hans-Peter Nilsson <hp@axis.com>
Mon, 8 Mar 2004 00:56:30 +0000 (00:56 +0000)
committer Hans-Peter Nilsson <hp@gcc.gnu.org>
Mon, 8 Mar 2004 00:56:30 +0000 (00:56 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index ba7b868603a3705a94ae2538ff6cf36c790ffdca..ff9cb6461080694dfd5bf00057df367012f7996e 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,26 @@
+2004-03-08  Hans-Peter Nilsson  <hp@axis.com>
+
+       PR target/14471
+       * doc/invoke.texi (CRIS Options): Document -mmul-bug-workaround
+       and -mno-mul-bug-workaround.
+       * config/cris/cris.md ("smulsi3_highpart", "umulsi3_highpart")
+       ("mulsidi3", "umulsidi3"): Prefix output template with "%!".
+       ("umulhisi3", "umulqihi3", "mulsi3", "mulqihi3", "mulhisi3"):
+       Ditto.  Make attribute "slottable" dependent on TARGET_MUL_BUG.
+       * config/cris/mulsi3.asm (__Mul) [__CRIS_arch_version >= 10]: Make
+       sure mulu.d is not last on cache-line.
+       * config/cris/cris.h (ASM_SPEC): Translate -mno-mul-bug-workaround
+       into -no-mul-bug-abort depending on HAVE_AS_MUL_BUG_ABORT_OPTION.
+       (TARGET_MASK_MUL_BUG, TARGET_MUL_BUG): New macros.
+       (TARGET_SWITCHES): New options -mmul-bug-workaround and
+       -mno-mul-bug-workaround.
+       (TARGET_DEFAULT): Include TARGET_MASK_MUL_BUG.
+       (PRINT_OPERAND_PUNCT_VALID_P): Include '!'.
+       * config/cris/cris.c (cris_operand_extend_operator): Clarify
+       relation to MULT in head comment.
+       (cris_op_str): Abort for MULT.
+       (cris_print_operand) <case '!'>: New case.
+
  2004-03-06  Richard Henderson  <rth@redhat.com>
  
          * config/alpha/alpha.c (alpha_in_small_data_p): False for functions.
diff --git a/gcc/config/cris/cris.c b/gcc/config/cris/cris.c

index 9973c7b9ed162a7d9ccc71480947b00a387db220..2e2970e23a169630b6d63464ba9b7459256f365a 100644 (file)
--- a/gcc/config/cris/cris.c
+++ b/gcc/config/cris/cris.c
@@ -312,7 +312,10 @@ cris_commutative_orth_op (x, mode)
            || code == IOR || code == AND || code == UMIN));
  }
  
-/* Check if MODE is same as mode for X, and X is PLUS or MINUS or UMIN.  */
+/* Check if MODE is same as mode for X, and X is PLUS or MINUS or UMIN.
+   By the name, you might think we should include MULT.  We don't because
+   it doesn't accept the same addressing modes as the others (ony
+   registers) and there's also the problem of handling TARGET_MUL_BUG.  */
  
  int
  cris_operand_extend_operator (x, mode)
@@ -496,7 +499,11 @@ cris_op_str (x)
        break;
  
      case MULT:
-      return "mul";
+      /* This function is for retrieving a part of an instruction name for
+        an operator, for immediate output.  If that ever happens for
+        MULT, we need to apply TARGET_MUL_BUG in the caller.  Make sure
+        we notice.  */
+      abort ();
        break;
  
      case DIV:
@@ -1401,6 +1408,23 @@ cris_print_operand (file, x, code)
         fputs ("\n\tnop", file);
        return;
  
+    case '!':
+      /* Output directive for alignment padded with "nop" insns.
+        Optimizing for size, it's plain 4-byte alignment, otherwise we
+        align the section to a cache-line (32 bytes) and skip at max 2
+        bytes, i.e. we skip if it's the last insn on a cache-line.  The
+        latter is faster by a small amount (for two test-programs 99.6%
+        and 99.9%) and larger by a small amount (ditto 100.1% and
+        100.2%).  This is supposed to be the simplest yet performance-
+        wise least intrusive way to make sure the immediately following
+        (supposed) muls/mulu insn isn't located at the end of a
+        cache-line.  */
+      if (TARGET_MUL_BUG)
+       fputs (optimize_size
+              ? ".p2alignw 2,0x050f\n\t"
+              : ".p2alignw 5,0x050f,2\n\t", file);
+      return;
+
      case 'H':
        /* Print high (most significant) part of something.  */
        switch (GET_CODE (operand))
diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h

index 2783aecc85107b03499130bed90fbf9a8ba50508..10ae4aa35872235006d5c5b3ae8df43c5fe8e818 100644 (file)
--- a/gcc/config/cris/cris.h
+++ b/gcc/config/cris/cris.h
@@ -170,9 +170,17 @@ extern const char *cris_elinux_stacksize_str;
     %{!melinux:%{!maout|melf:%{!fno-vtable-gc:-fvtable-gc}}}}}".  */
  #define CC1PLUS_SPEC ""
  
+#ifdef HAVE_AS_MUL_BUG_ABORT_OPTION
+#define MAYBE_AS_NO_MUL_BUG_ABORT \
+ "%{mno-mul-bug-workaround:-no-mul-bug-abort} "
+#else
+#define MAYBE_AS_NO_MUL_BUG_ABORT
+#endif
+
  /* Override previous definitions (linux.h).  */
  #undef ASM_SPEC
  #define ASM_SPEC \
+ MAYBE_AS_NO_MUL_BUG_ABORT \
   "%{v:-v}\
    %(asm_subtarget)"
  
@@ -331,8 +339,34 @@ extern int target_flags;
  #define TARGET_MASK_AVOID_GOTPLT 8192
  #define TARGET_AVOID_GOTPLT (target_flags & TARGET_MASK_AVOID_GOTPLT)
  
+/* Whether or not to work around multiplication instruction hardware bug
+   when generating code for models where it may be present.  From the
+   trouble report for Etrax 100 LX: "A multiply operation may cause
+   incorrect cache behaviour under some specific circumstances. The
+   problem can occur if the instruction following the multiply instruction
+   causes a cache miss, and multiply operand 1 (source operand) bits
+   [31:27] matches the logical mapping of the mode register address
+   (0xb0....), and bits [9:2] of operand 1 matches the TLB register
+   address (0x258-0x25f).  There is such a mapping in kernel mode or when
+   the MMU is off.  Normally there is no such mapping in user mode, and
+   the problem will therefore probably not occur in Linux user mode
+   programs."
+
+   We have no sure-fire way to know from within GCC that we're compiling a
+   user program.  For example, -fpic/PIC is used in libgcc which is linked
+   into the kernel.  However, the workaround option -mno-mul-bug can be
+   safely used per-package when compiling programs.  The same goes for
+   general user-only libraries such as glibc, since there's no user-space
+   driver-like program that gets a mapping of I/O registers (all on the
+   same page, including the TLB registers).  */
+#define TARGET_MASK_MUL_BUG 16384
+#define TARGET_MUL_BUG (target_flags & TARGET_MASK_MUL_BUG)
+
  #define TARGET_SWITCHES                                                        \
   {                                                                     \
+  {"mul-bug-workaround",                TARGET_MASK_MUL_BUG,           \
+   N_("Work around bug in multiplication instruction")},               \
+  {"no-mul-bug-workaround",            -TARGET_MASK_MUL_BUG, ""},      \
    /* No "no-etrax" as it does not really imply any model.              \
       On the other hand, "etrax" implies the common (and large)         \
       subset matching all models.  */                                   \
@@ -410,7 +444,7 @@ extern int target_flags;
  # define TARGET_DEFAULT \
   (TARGET_MASK_SIDE_EFFECT_PREFIXES + TARGET_MASK_STACK_ALIGN \
    + TARGET_MASK_CONST_ALIGN + TARGET_MASK_DATA_ALIGN \
-  + TARGET_MASK_PROLOGUE_EPILOGUE)
+  + TARGET_MASK_PROLOGUE_EPILOGUE + TARGET_MASK_MUL_BUG)
  #endif
  
  /* For the cris-*-elf subtarget.  */
@@ -1622,7 +1656,8 @@ call_ ## FUNC (void)                                              \
   cris_print_operand (FILE, X, CODE)
  
  /* For delay-slot handling.  */
-#define PRINT_OPERAND_PUNCT_VALID_P(CODE) (CODE == '#')
+#define PRINT_OPERAND_PUNCT_VALID_P(CODE)      \
+ ((CODE) == '#' || (CODE) == '!')
  
  #define PRINT_OPERAND_ADDRESS(FILE, ADDR)      \
     cris_print_operand_address (FILE, ADDR)
diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md

index b98115db70ad04121cfbbab4ffbdcfee472792fc..4a9b6c8b13c9b494373ebde0f128b320490d603b 100644 (file)
--- a/gcc/config/cris/cris.md
+++ b/gcc/config/cris/cris.md
@@ -2468,8 +2468,11 @@
          (zero_extend:SI (match_operand:HI 1 "register_operand" "0"))
          (zero_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
    "TARGET_HAS_MUL_INSNS"
-  "mulu.w %2,%0"
-  [(set_attr "slottable" "yes")
+  "%!mulu.w %2,%0"
+  [(set (attr "slottable")
+       (if_then_else (ne (symbol_ref "TARGET_MUL_BUG") (const_int 0))
+                     (const_string "no")
+                     (const_string "yes")))
     ;; Just N unusable here, but let's be safe.
     (set_attr "cc" "clobber")])
  
@@ -2479,8 +2482,11 @@
          (zero_extend:HI (match_operand:QI 1 "register_operand" "0"))
          (zero_extend:HI (match_operand:QI 2 "register_operand" "r"))))]
    "TARGET_HAS_MUL_INSNS"
-  "mulu.b %2,%0"
-  [(set_attr "slottable" "yes")
+  "%!mulu.b %2,%0"
+  [(set (attr "slottable")
+       (if_then_else (ne (symbol_ref "TARGET_MUL_BUG") (const_int 0))
+                     (const_string "no")
+                     (const_string "yes")))
     ;; Not exactly sure, but let's be safe.
     (set_attr "cc" "clobber")])
  
@@ -2495,8 +2501,11 @@
         (mult:SI (match_operand:SI 1 "register_operand" "0")
                  (match_operand:SI 2 "register_operand" "r")))]
    "TARGET_HAS_MUL_INSNS"
-  "muls.d %2,%0"
-  [(set_attr "slottable" "yes")
+  "%!muls.d %2,%0"
+  [(set (attr "slottable")
+       (if_then_else (ne (symbol_ref "TARGET_MUL_BUG") (const_int 0))
+                     (const_string "no")
+                     (const_string "yes")))
     ;; Just N unusable here, but let's be safe.
     (set_attr "cc" "clobber")])
  \f
@@ -2510,8 +2519,11 @@
          (sign_extend:HI (match_operand:QI 1 "register_operand" "0"))
          (sign_extend:HI (match_operand:QI 2 "register_operand" "r"))))]
    "TARGET_HAS_MUL_INSNS"
-  "muls.b %2,%0"
-  [(set_attr "slottable" "yes")
+  "%!muls.b %2,%0"
+  [(set (attr "slottable")
+       (if_then_else (ne (symbol_ref "TARGET_MUL_BUG") (const_int 0))
+                     (const_string "no")
+                     (const_string "yes")))
     (set_attr "cc" "clobber")])
  
  (define_insn "mulhisi3"
@@ -2520,8 +2532,11 @@
          (sign_extend:SI (match_operand:HI 1 "register_operand" "0"))
          (sign_extend:SI (match_operand:HI 2 "register_operand" "r"))))]
    "TARGET_HAS_MUL_INSNS"
-  "muls.w %2,%0"
-  [(set_attr "slottable" "yes")
+  "%!muls.w %2,%0"
+  [(set (attr "slottable")
+       (if_then_else (ne (symbol_ref "TARGET_MUL_BUG") (const_int 0))
+                     (const_string "no")
+                     (const_string "yes")))
     ;; Just N unusable here, but let's be safe.
     (set_attr "cc" "clobber")])
  
@@ -2537,7 +2552,7 @@
          (sign_extend:DI (match_operand:SI 1 "register_operand" "0"))
          (sign_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
    "TARGET_HAS_MUL_INSNS"
-  "muls.d %2,%M0\;move $mof,%H0")
+  "%!muls.d %2,%M0\;move $mof,%H0")
  
  (define_insn "umulsidi3"
    [(set (match_operand:DI 0 "register_operand" "=r")
@@ -2545,7 +2560,7 @@
          (zero_extend:DI (match_operand:SI 1 "register_operand" "0"))
          (zero_extend:DI (match_operand:SI 2 "register_operand" "r"))))]
    "TARGET_HAS_MUL_INSNS"
-  "mulu.d %2,%M0\;move $mof,%H0")
+  "%!mulu.d %2,%M0\;move $mof,%H0")
  
  ;; This pattern would probably not be needed if we add "mof" in its own
  ;; register class (and open a can of worms about /not/ pairing it with a
@@ -2564,7 +2579,7 @@
           (const_int 32))))
     (clobber (match_scratch:SI 3 "=X,1,1"))]
    "TARGET_HAS_MUL_INSNS"
-  "muls.d %2,%1\;move $mof,%0"
+  "%!muls.d %2,%1\;move $mof,%0"
    [(set_attr "cc" "clobber")])
  
  (define_insn "umulsi3_highpart"
@@ -2577,7 +2592,7 @@
           (const_int 32))))
     (clobber (match_scratch:SI 3 "=X,1,1"))]
    "TARGET_HAS_MUL_INSNS"
-  "mulu.d %2,%1\;move $mof,%0"
+  "%!mulu.d %2,%1\;move $mof,%0"
    [(set_attr "cc" "clobber")])
  \f
  ;; Divide and modulus instructions.  CRIS only has a step instruction.
diff --git a/gcc/config/cris/mulsi3.asm b/gcc/config/cris/mulsi3.asm

index 3c482e7f2a96326ba74d02efc78c8481be5297b3..69d9dedd9665c4aef56797c42a0d53df691ea7aa 100644 (file)
--- a/gcc/config/cris/mulsi3.asm
+++ b/gcc/config/cris/mulsi3.asm
@@ -82,8 +82,13 @@
         .type   ___Mul,@function
  ___Mul:
  #if defined (__CRIS_arch_version) && __CRIS_arch_version >= 10
-       ret
+;; Can't have the mulu.d last on a cache-line (in the delay-slot of the
+;; "ret"), due to hardware bug.  See documentation for -mmul-bug-workaround.
+;; Not worthwhile to conditionalize here.
+       .p2alignw 2,0x050f
         mulu.d $r11,$r10
+       ret
+       nop
  #else
         move.d $r10,$r12
         move.d $r11,$r9
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi

index 00ba3a3fc8b2f8ac40206126e64f7eafa70999cb..a12a2a65773b95fbc4f5a856c41b8c636b9b05ba 100644 (file)
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -603,7 +603,8 @@ in the following sections.
  -metrax4  -metrax100  -mpdebug  -mcc-init  -mno-side-effects @gol
  -mstack-align  -mdata-align  -mconst-align @gol
  -m32-bit  -m16-bit  -m8-bit  -mno-prologue-epilogue  -mno-gotplt @gol
--melf  -maout  -melinux  -mlinux  -sim  -sim2}
+-melf  -maout  -melinux  -mlinux  -sim  -sim2 @gol
+-mmul-bug-workaround  -mno-mul-bug-workaround}
  
  @emph{PDP-11 Options}
  @gccoptlist{-mfpu  -msoft-float  -mac0  -mno-ac0  -m40  -m45  -m10 @gol
@@ -9826,6 +9827,13 @@ program should be set to @var{n} bytes.
  The options @option{-metrax4} and @option{-metrax100} are synonyms for
  @option{-march=v3} and @option{-march=v8} respectively.
  
+@item -mmul-bug-workaround
+@itemx -mno-mul-bug-workaround
+@opindex mmul-bug-workaround
+@opindex mno-mul-bug-workaround
+Work around a bug in the @code{muls} and @code{mulu} instructions for CPU
+models where it applies.  This option is active by default.
+
  @item -mpdebug
  @opindex mpdebug
  Enable CRIS-specific verbose debug-related information in the assembly
author	Hans-Peter Nilsson <hp@axis.com>
	Mon, 8 Mar 2004 00:56:30 +0000 (00:56 +0000)
committer	Hans-Peter Nilsson <hp@gcc.gnu.org>
	Mon, 8 Mar 2004 00:56:30 +0000 (00:56 +0000)
gcc/ChangeLog		patch \| blob \| blame \| history
gcc/config/cris/cris.c		patch \| blob \| blame \| history
gcc/config/cris/cris.h		patch \| blob \| blame \| history
gcc/config/cris/cris.md		patch \| blob \| blame \| history
gcc/config/cris/mulsi3.asm		patch \| blob \| blame \| history
gcc/doc/invoke.texi		patch \| blob \| blame \| history