From: Andrew Stubbs <ams@codesourcery.com>
Date: Fri, 20 Oct 2023 15:26:51 +0000 (+0100)
Subject: vect: Don't set excess bits in unform masks
X-Git-Tag: basepoints/gcc-15~4820
X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a5922427c29fad177251d89cc946d1c5bfc135eb;p=thirdparty%2Fgcc.git

vect: Don't set excess bits in unform masks

AVX ignores any excess bits in the mask (at least for vector sizes >=8), but
AMD GCN magically uses a larger vector than was intended (the smaller sizes are
"fake"), leading to wrong-code.

This patch fixes amdgcn execution failures in gcc.dg/vect/pr81740-1.c,
gfortran.dg/c-interop/contiguous-1.f90,
gfortran.dg/c-interop/ff-descriptor-7.f90, and others.

gcc/ChangeLog:

	* expr.cc (store_constructor): Add "and" operation to uniform mask
	generation.
---

diff --git a/gcc/expr.cc b/gcc/expr.cc
index ed4dbb13d893..3e2a678710d6 100644
--- a/gcc/expr.cc
+++ b/gcc/expr.cc
@@ -7470,7 +7470,7 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size,
 	    break;
 	  }
 	/* Use sign-extension for uniform boolean vectors with
-	   integer modes.  */
+	   integer modes.  Effectively "vec_duplicate" for bitmasks.  */
 	if (!TREE_SIDE_EFFECTS (exp)
 	    && VECTOR_BOOLEAN_TYPE_P (type)
 	    && SCALAR_INT_MODE_P (mode)
@@ -7479,7 +7479,19 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size,
 	  {
 	    rtx op0 = force_reg (TYPE_MODE (TREE_TYPE (elt)),
 				 expand_normal (elt));
-	    convert_move (target, op0, 0);
+	    rtx tmp = gen_reg_rtx (mode);
+	    convert_move (tmp, op0, 0);
+
+	    /* Ensure no excess bits are set.
+	       GCN needs this for nunits < 64.
+	       x86 needs this for nunits < 8.  */
+	    auto nunits = TYPE_VECTOR_SUBPARTS (type).to_constant ();
+	    if (maybe_ne (GET_MODE_PRECISION (mode), nunits))
+	      tmp = expand_binop (mode, and_optab, tmp,
+				  GEN_INT ((1 << nunits) - 1), target,
+				  true, OPTAB_DIRECT);
+	    if (tmp != target)
+	      emit_move_insn (target, tmp);
 	    break;
 	  }