From 5b2d679bbbcc2b976c6e228ba63afdf67c33164e Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Mon, 7 Feb 2022 14:12:34 +0100 Subject: [PATCH] [nvptx] Workaround sub.u16 driver JIT bug There's a nvidia driver JIT bug that mishandles this code (minimized from builtin-arith-overflow-15.c): ... int main (void) { signed char r; unsigned char y = (unsigned char) 0x80; if (__builtin_sub_overflow ((unsigned char)0, (unsigned char)y, &r)) __builtin_abort (); return 0; } ... which at ptx level minimizes to: ... mov.u16 r22, 0x0080; st.local.u16 [frame_var],r22; ld.local.u16 r32,[frame_var]; sub.u16 r33,0x0000,r32; cvt.u32.u16 r35,r33; ... where we expect r35 == 0x0000ff80 but get instead 0xffffff80, and where using nvptx-none-run -O0 fixes the problem. [ See also https://github.com/vries/nvidia-bugs/tree/master/builtin-arith-overflow-15 . ] Try to workaround the bug by using sub.s16 instead of sub.u16. Tested on nvptx. gcc/ChangeLog: 2022-02-07 Tom de Vries PR target/97005 * config/nvptx/nvptx.md (define_insn "sub3"): Workaround driver JIT bug by using sub.s16 instead of sub.u16. --- gcc/config/nvptx/nvptx.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index bb0c0b3b9a5e..cced68e0d4a6 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -506,7 +506,14 @@ (minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R") (match_operand:HSDIM 2 "nvptx_register_operand" "R")))] "" - "%.\\tsub%t0\\t%0, %1, %2;") + { + if (GET_MODE (operands[0]) == HImode) + /* Workaround https://developer.nvidia.com/nvidia_bug/3527713. + See PR97005. */ + return "%.\\tsub.s16\\t%0, %1, %2;"; + + return "%.\\tsub%t0\\t%0, %1, %2;"; + }) (define_insn "mul3" [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R") -- 2.47.2