[PATCH, PR target/94954] Fix wrong codegen for vec_pack_to_short_fp32() builtin

author Will Schmidt <will_schmidt@vnet.ibm.com>

Wed, 24 Jun 2020 20:28:24 +0000 (15:28 -0500)

committer Will Schmidt <will_schmidt@vnet.ibm.com>

Wed, 24 Jun 2020 21:08:46 +0000 (16:08 -0500)
author Will Schmidt <will_schmidt@vnet.ibm.com>
Wed, 24 Jun 2020 20:28:24 +0000 (15:28 -0500)
committer Will Schmidt <will_schmidt@vnet.ibm.com>
Wed, 24 Jun 2020 21:08:46 +0000 (16:08 -0500)
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h

index 273b2f2848eed9cd3136d0e6d27a44b49041e74b..bb1524f4a6793a192d36623387a5a950fad49382 100644 (file)
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -433,7 +433,7 @@
  #define vec_first_match_or_eos_index __builtin_vec_first_match_or_eos_index
  #define vec_first_mismatch_index __builtin_vec_first_mismatch_index
  #define vec_first_mismatch_or_eos_index __builtin_vec_first_mismatch_or_eos_index
-#define vec_pack_to_short_fp32 __builtin_vec_convert_4f32_8i16
+#define vec_pack_to_short_fp32 __builtin_vec_convert_4f32_8f16
  #define vec_parity_lsbb __builtin_vec_vparity_lsbb
  #define vec_vctz __builtin_vec_vctz
  #define vec_cnttz __builtin_vec_vctz
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md

index 14c8a286a87b76b730741ec951de468ff8489734..2ce9227c765ad9545369fe723ddd672af245f9bc 100644 (file)
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -80,6 +80,7 @@
     UNSPEC_VUPKHPX
     UNSPEC_VUPKLPX
     UNSPEC_CONVERT_4F32_8I16
+   UNSPEC_CONVERT_4F32_8F16
     UNSPEC_DST
     UNSPEC_DSTT
     UNSPEC_DSTST
@@ -3217,6 +3218,39 @@
    DONE;
  })
  
+
+;; Convert two vector F32 to packed vector F16.
+;; This builtin packs 32-bit floating-point values into a packed
+;; 16-bit floating point values (stored in 16bit integer type).
+;; (vector unsigned short r = vec_pack_to_short_fp32 (a, b);
+;; The expected codegen for this builtin is
+;;    xvcvsphp t, a
+;;    xvcvsphp u, b
+;;    if (little endian)
+;;      vpkuwum r, t, u
+;;    else
+;;      vpkuwum r, u, t
+
+(define_expand "convert_4f32_8f16"
+  [(set (match_operand:V8HI 0 "register_operand" "=v")
+       (unspec:V8HI [(match_operand:V4SF 1 "register_operand" "v")
+                     (match_operand:V4SF 2 "register_operand" "v")]
+                    UNSPEC_CONVERT_4F32_8F16))]
+  "TARGET_P9_VECTOR"
+{
+  rtx rtx_tmp_hi = gen_reg_rtx (V4SImode);
+  rtx rtx_tmp_lo = gen_reg_rtx (V4SImode);
+
+  emit_insn (gen_vsx_xvcvsphp (rtx_tmp_hi, operands[1]));
+  emit_insn (gen_vsx_xvcvsphp (rtx_tmp_lo, operands[2]));
+  if (!BYTES_BIG_ENDIAN)
+    emit_insn (gen_altivec_vpkuwum (operands[0], rtx_tmp_hi, rtx_tmp_lo));
+  else
+    emit_insn (gen_altivec_vpkuwum (operands[0], rtx_tmp_lo, rtx_tmp_hi));
+  DONE;
+})
+
+
  ;; Generate
  ;;    xxlxor/vxor SCRATCH0,SCRATCH0,SCRATCH0
  ;;    vsubu?m SCRATCH2,SCRATCH1,%1
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def

index 8c5f8c687934306ee8eaae47d0744927b8d5af25..363656ec05cc23ed66d20ef346a85b9ee4c616d0 100644 (file)
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2289,6 +2289,7 @@ BU_P8V_OVERLOAD_3 (VPERMXOR,   "vpermxor")
  BU_P9V_AV_2 (VSLV,             "vslv",                 CONST, vslv)
  BU_P9V_AV_2 (VSRV,             "vsrv",                 CONST, vsrv)
  BU_P9V_AV_2 (CONVERT_4F32_8I16, "convert_4f32_8i16", CONST, convert_4f32_8i16)
+BU_P9V_AV_2 (CONVERT_4F32_8F16, "convert_4f32_8f16", CONST, convert_4f32_8f16)
  
  BU_P9V_AV_2 (VFIRSTMATCHINDEX_V16QI, "first_match_index_v16qi",
              CONST, first_match_index_v16qi)
@@ -2319,6 +2320,7 @@ BU_P9V_AV_2 (VFIRSTMISMATCHOREOSINDEX_V4SI, "first_mismatch_or_eos_index_v4si",
  BU_P9V_OVERLOAD_2 (VSLV,       "vslv")
  BU_P9V_OVERLOAD_2 (VSRV,       "vsrv")
  BU_P9V_OVERLOAD_2 (CONVERT_4F32_8I16, "convert_4f32_8i16")
+BU_P9V_OVERLOAD_2 (CONVERT_4F32_8F16, "convert_4f32_8f16")
  
  /* 2 argument vector functions added in ISA 3.0 (power9). */
  BU_P9V_AV_2 (VADUB,            "vadub",                CONST,  vaduv16qi3)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c

index 81816a5be6b482d01c0bc65699406b922409fb56..d8b85347cc80706af117e88dc0ec18adac9c0e0f 100644 (file)
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -1985,6 +1985,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
  
    { P9V_BUILTIN_VEC_CONVERT_4F32_8I16, P9V_BUILTIN_CONVERT_4F32_8I16,
      RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+  { P9V_BUILTIN_VEC_CONVERT_4F32_8F16, P9V_BUILTIN_CONVERT_4F32_8F16,
+    RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
  
    { P9V_BUILTIN_VEC_VFIRSTMATCHINDEX, P9V_BUILTIN_VFIRSTMATCHINDEX_V16QI,
      RS6000_BTI_UINTSI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md

index e5a10356b4b68e2a7a7598d7098cd2a8f5137e1b..732a54842b61afd289bd00552335aa3a1536422b 100644 (file)
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -299,6 +299,7 @@
     UNSPEC_VSX_XVCVBF16SP
     UNSPEC_VSX_XVCVSPBF16
     UNSPEC_VSX_XVCVSPSXDS
+   UNSPEC_VSX_XVCVSPHP
     UNSPEC_VSX_VSLO
     UNSPEC_VSX_EXTRACT
     UNSPEC_VSX_SXEXPDP
@@ -2187,6 +2188,15 @@
    "xvcvhpsp %x0,%x1"
    [(set_attr "type" "vecfloat")])
  
+;; Generate xvcvsphp
+(define_insn "vsx_xvcvsphp"
+  [(set (match_operand:V4SI 0 "register_operand" "=wa")
+       (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" "wa")]
+                    UNSPEC_VSX_XVCVSPHP))]
+  "TARGET_P9_VECTOR"
+  "xvcvsphp %x0,%x1"
+[(set_attr "type" "vecfloat")])
+
  ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the internal SF
  ;; format of scalars is actually DF.
  (define_insn "vsx_xscvdpsp_scalar"
diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1-p9-runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-1-p9-runnable.c

index 0e4ab48f5554fc5c9406d393ca2f28657f82615a..0e6eeff4b7e52105e8edb3103c0f48c7ee0b3a23 100644 (file)
--- a/gcc/testsuite/gcc.target/powerpc/builtins-1-p9-runnable.c
+++ b/gcc/testsuite/gcc.target/powerpc/builtins-1-p9-runnable.c
@@ -1,25 +1,50 @@
-/* { dg-do run { target { powerpc*-*-linux* && { lp64 && p9vector_hw } } } } */
-/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-do run { target { powerpc*-*-linux* && p9vector_hw } } } */
+/* { dg-require-effective-target p9vector_hw } */
  /* { dg-options "-O2 -mdejagnu-cpu=power9" } */
  
  #include <altivec.h>
+#include <stdio.h>
  
  void abort (void);
  
  int main() {
    int i;
    vector float vfa, vfb;
-  vector unsigned short vur, vuexpt;
+  vector unsigned short vresult, vexpected;
  
-  vfa = (vector float){3.4, 5.0, 20.0, 50.9 };
-  vfb = (vector float){10.0, 40.0, 70.0, 100.0 };
-  vuexpt = (vector unsigned short){ 3, 5, 20, 50,
-                                    10, 40, 70, 100};
+  vfa = (vector float){0.4, 1.6, 20.0, 99.9 };
+  vfb = (vector float){10.0, -2.0, 70.0, 999.0 };
  
-  vur = vec_pack_to_short_fp32 (vfa, vfb);
+  /* Expected results.  */
+  vexpected = (vector unsigned short) { 0x3666, 0x3e66, 0x4d00, 0x563e,
+                                       0x4900, 0xc000, 0x5460, 0x63ce};
+
+/*
+     vresult = vec_pack_to_short_fp32 (vfa, vfb);
+  This built-in converts a pair of vector floats into a single vector of
+  packed half-precision (F16) values.  The result type is a vector of
+  signed shorts.
+  The expected codegen for this builtin is
+    xvcvsphp t, vfa
+    xvcvsphp u, vfb
+    if (little endian)
+      vpkuwum vresult, t, u
+    else
+      vpkuwum vresult, u, t
+*/
+
+  vresult = vec_pack_to_short_fp32 (vfa, vfb);
+
+#ifdef DEBUG
+  for(i = 0; i< 4; i++) { printf("i=[%d] %f  \n",i,vfa[i]); }
+  for(i = 0; i< 4; i++) { printf("i=[%d] %f  \n",i+4,vfb[i]); }
+  for(i = 0; i< 8; i++) { printf("i=[%d] %d  \n",i,vresult[i]); }
+#endif
  
    for(i = 0; i< 8; i++) {
-    if (vur[i] != vuexpt[i])
+    if (vresult[i] != vexpected[i]) {
+       printf("i=[%d] 0x%x != 0x%x \n",i,vresult[i],vexpected[i]);
        abort();
+    }
    }
  }
author	Will Schmidt <will_schmidt@vnet.ibm.com>
	Wed, 24 Jun 2020 20:28:24 +0000 (15:28 -0500)
committer	Will Schmidt <will_schmidt@vnet.ibm.com>
	Wed, 24 Jun 2020 21:08:46 +0000 (16:08 -0500)
gcc/config/rs6000/altivec.h		patch \| blob \| blame \| history
gcc/config/rs6000/altivec.md		patch \| blob \| blame \| history
gcc/config/rs6000/rs6000-builtin.def		patch \| blob \| blame \| history
gcc/config/rs6000/rs6000-call.c		patch \| blob \| blame \| history
gcc/config/rs6000/vsx.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/powerpc/builtins-1-p9-runnable.c		patch \| blob \| blame \| history