]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Reimplement packuswb/packusdw with UNSPEC_US_TRUNCATE instead of original us_truncate.
authorliuhongt <hongtao.liu@intel.com>
Wed, 14 Jun 2023 02:34:32 +0000 (10:34 +0800)
committerliuhongt <hongtao.liu@intel.com>
Mon, 19 Jun 2023 01:34:16 +0000 (09:34 +0800)
packuswb/packusdw does unsigned saturation for signed source, but rtl
us_truncate means does unsigned saturation for unsigned source.
So for value -1, packuswb will produce 0, but us_truncate produces
255. The patch reimplement those related patterns and functions with
UNSPEC_US_TRUNCATE instead of us_truncate.

gcc/ChangeLog:

PR target/110235
* config/i386/i386-expand.cc (ix86_split_mmx_pack): Use
UNSPEC_US_TRUNCATE instead of original us_truncate for
packusdw/packuswb.
* config/i386/mmx.md (mmx_pack<s_trunsuffix>swb): Substitute
with ..
(mmx_packsswb): .. this and ..
(mmx_packuswb): .. this.
(mmx_packusdw): Use UNSPEC_US_TRUNCATE instead of original
us_truncate.
(s_trunsuffix): Removed code iterator.
(any_s_truncate): Ditto.
* config/i386/sse.md (<sse2_avx2>_packuswb<mask_name>): Use
UNSPEC_US_TRUNCATE instead of original us_truncate.
(<sse4_1_avx2>_packusdw<mask_name>): Ditto.
* config/i386/i386.md (UNSPEC_US_TRUNCATE): New unspec_c_enum.

gcc/config/i386/i386-expand.cc
gcc/config/i386/i386.md
gcc/config/i386/mmx.md
gcc/config/i386/sse.md

index 98c43c6704ca3c2ddc358446f117a59b1f7df9e3..ad2cd079fcb9ce2888f1820f8c7eaca14e90aa3f 100644 (file)
@@ -1021,6 +1021,7 @@ ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
   rtx op0 = operands[0];
   rtx op1 = operands[1];
   rtx op2 = operands[2];
+  rtx src;
 
   machine_mode dmode = GET_MODE (op0);
   machine_mode smode = GET_MODE (op1);
@@ -1044,11 +1045,20 @@ ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
   op1 = lowpart_subreg (sse_smode, op1, GET_MODE (op1));
   op2 = lowpart_subreg (sse_smode, op2, GET_MODE (op2));
 
-  op1 = gen_rtx_fmt_e (code, sse_half_dmode, op1);
-  op2 = gen_rtx_fmt_e (code, sse_half_dmode, op2);
-  rtx insn = gen_rtx_SET (dest, gen_rtx_VEC_CONCAT (sse_dmode,
-                                                   op1, op2));
-  emit_insn (insn);
+  /* paskusdw/packuswb does unsigned saturation of a signed source
+     which is different from generic us_truncate RTX.  */
+  if (code == US_TRUNCATE)
+    src = gen_rtx_UNSPEC (sse_dmode,
+                         gen_rtvec (2, op1, op2),
+                         UNSPEC_US_TRUNCATE);
+  else
+    {
+      op1 = gen_rtx_fmt_e (code, sse_half_dmode, op1);
+      op2 = gen_rtx_fmt_e (code, sse_half_dmode, op2);
+      src = gen_rtx_VEC_CONCAT (sse_dmode, op1, op2);
+    }
+
+  emit_move_insn (dest, src);
 
   ix86_move_vector_high_sse_to_mmx (op0);
 }
index bfdd54f54ba054c18d18eeae884fda46501542b6..95a6653cd13e20b5527bd3d38186dab1fa50d5bc 100644 (file)
   UNSPEC_RSQRT
   UNSPEC_PSADBW
 
+  ;; Different from generic us_truncate RTX
+  ;; as it does unsigned saturation of signed source.
+  UNSPEC_US_TRUNCATE
+
   ;; For AVX/AVX512F support
   UNSPEC_SCALEF
   UNSPEC_PCMP
index 6fbe3909c8bb556f87311c2bbc5c317d9c68f9a4..12b103a06a7e7148793a4caeade8c4414e869d24 100644 (file)
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-;; Used in signed and unsigned truncations with saturation.
-(define_code_iterator any_s_truncate [ss_truncate us_truncate])
-;; Instruction suffix for truncations with saturation.
-(define_code_attr s_trunsuffix [(ss_truncate "s") (us_truncate "u")])
-
-(define_insn_and_split "mmx_pack<s_trunsuffix>swb"
+(define_insn_and_split "mmx_packsswb"
   [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
        (vec_concat:V8QI
-         (any_s_truncate:V4QI
+         (ss_truncate:V4QI
            (match_operand:V4HI 1 "register_operand" "0,0,Yw"))
-         (any_s_truncate:V4QI
+         (ss_truncate:V4QI
            (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))))]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "@
-   pack<s_trunsuffix>swb\t{%2, %0|%0, %2}
+   packsswb\t{%2, %0|%0, %2}
+   #
+   #"
+  "&& reload_completed
+   && SSE_REGNO_P (REGNO (operands[0]))"
+  [(const_int 0)]
+  "ix86_split_mmx_pack (operands, SS_TRUNCATE); DONE;"
+  [(set_attr "mmx_isa" "native,sse_noavx,avx")
+   (set_attr "type" "mmxshft,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
+
+;; This instruction does unsigned saturation of signed source
+;; and is different from generic us_truncate RTX.
+(define_insn_and_split "mmx_packuswb"
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
+       (unspec:V8QI
+         [(match_operand:V4HI 1 "register_operand" "0,0,Yw")
+          (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")]
+         UNSPEC_US_TRUNCATE))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   packuswb\t{%2, %0|%0, %2}
    #
    #"
   "&& reload_completed
    && SSE_REGNO_P (REGNO (operands[0]))"
   [(const_int 0)]
-  "ix86_split_mmx_pack (operands, <any_s_truncate:CODE>); DONE;"
+  "ix86_split_mmx_pack (operands, US_TRUNCATE); DONE;"
   [(set_attr "mmx_isa" "native,sse_noavx,avx")
    (set_attr "type" "mmxshft,sselog,sselog")
    (set_attr "mode" "DI,TI,TI")])
 
 (define_insn_and_split "mmx_packusdw"
   [(set (match_operand:V4HI 0 "register_operand" "=Yr,*x,Yw")
-       (vec_concat:V4HI
-         (us_truncate:V2HI
-           (match_operand:V2SI 1 "register_operand" "0,0,Yw"))
-         (us_truncate:V2HI
-           (match_operand:V2SI 2 "register_operand" "Yr,*x,Yw"))))]
+       (unspec:V4HI
+         [(match_operand:V2SI 1 "register_operand" "0,0,Yw")
+          (match_operand:V2SI 2 "register_operand" "Yr,*x,Yw")]
+          UNSPEC_US_TRUNCATE))]
   "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
   "#"
   "&& reload_completed"
index 70d7410f7a916678c4edb8e084028db7c6cfb5d7..35b46a54130afa5d7849f33d7664c66f89cdf8cf 100644 (file)
    (set_attr "prefix" "orig,<mask_prefix>")
    (set_attr "mode" "<sseinsnmode>")])
 
+;; This instruction does unsigned saturation of signed source
+;; and is different from generic us_truncate RTX.
 (define_insn "<sse2_avx2>_packuswb<mask_name>"
   [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>")
-       (vec_concat:VI1_AVX512
-         (us_truncate:<ssehalfvecmode>
-           (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>"))
-         (us_truncate:<ssehalfvecmode>
-           (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))]
+       (unspec:VI1_AVX512
+         [(match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>")
+          (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m")]
+         UNSPEC_US_TRUNCATE))]
   "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
   "@
    packuswb\t{%2, %0|%0, %2}
 
 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
   [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,<v_Yw>")
-       (vec_concat:VI2_AVX2
-         (us_truncate:<ssehalfvecmode>
-           (match_operand:<sseunpackmode> 1 "register_operand" "0,0,<v_Yw>"))
-         (us_truncate:<ssehalfvecmode>
-           (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,<v_Yw>m"))))]
+       (unspec:VI2_AVX2
+         [(match_operand:<sseunpackmode> 1 "register_operand" "0,0,<v_Yw>")
+          (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,<v_Yw>m")]
+          UNSPEC_US_TRUNCATE))]
   "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
   "@
    packusdw\t{%2, %0|%0, %2}