From: Andrew Stubbs Date: Fri, 20 Oct 2023 15:26:51 +0000 (+0100) Subject: vect: Don't set excess bits in unform masks X-Git-Tag: basepoints/gcc-15~4820 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=a5922427c29fad177251d89cc946d1c5bfc135eb;p=thirdparty%2Fgcc.git vect: Don't set excess bits in unform masks AVX ignores any excess bits in the mask (at least for vector sizes >=8), but AMD GCN magically uses a larger vector than was intended (the smaller sizes are "fake"), leading to wrong-code. This patch fixes amdgcn execution failures in gcc.dg/vect/pr81740-1.c, gfortran.dg/c-interop/contiguous-1.f90, gfortran.dg/c-interop/ff-descriptor-7.f90, and others. gcc/ChangeLog: * expr.cc (store_constructor): Add "and" operation to uniform mask generation. --- diff --git a/gcc/expr.cc b/gcc/expr.cc index ed4dbb13d893..3e2a678710d6 100644 --- a/gcc/expr.cc +++ b/gcc/expr.cc @@ -7470,7 +7470,7 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size, break; } /* Use sign-extension for uniform boolean vectors with - integer modes. */ + integer modes. Effectively "vec_duplicate" for bitmasks. */ if (!TREE_SIDE_EFFECTS (exp) && VECTOR_BOOLEAN_TYPE_P (type) && SCALAR_INT_MODE_P (mode) @@ -7479,7 +7479,19 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size, { rtx op0 = force_reg (TYPE_MODE (TREE_TYPE (elt)), expand_normal (elt)); - convert_move (target, op0, 0); + rtx tmp = gen_reg_rtx (mode); + convert_move (tmp, op0, 0); + + /* Ensure no excess bits are set. + GCN needs this for nunits < 64. + x86 needs this for nunits < 8. */ + auto nunits = TYPE_VECTOR_SUBPARTS (type).to_constant (); + if (maybe_ne (GET_MODE_PRECISION (mode), nunits)) + tmp = expand_binop (mode, and_optab, tmp, + GEN_INT ((1 << nunits) - 1), target, + true, OPTAB_DIRECT); + if (tmp != target) + emit_move_insn (target, tmp); break; }