]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
tree-optimization/115868 - ICE with .MASK_CALL in simdclone
authorRichard Biener <rguenther@suse.de>
Thu, 11 Jul 2024 07:56:56 +0000 (09:56 +0200)
committerRichard Biener <rguenth@gcc.gnu.org>
Sat, 13 Jul 2024 11:29:49 +0000 (13:29 +0200)
The following adjusts mask recording which didn't take into account
that we can merge call arguments from two vectors like

  _50 = {vect_d_1.253_41, vect_d_1.254_43};
  _51 = VIEW_CONVERT_EXPR<unsigned char>(mask__19.257_49);
  _52 = (unsigned int) _51;
  _53 = _Z3bazd.simdclone.7 (_50, _52);
  _54 = BIT_FIELD_REF <_53, 256, 0>;
  _55 = BIT_FIELD_REF <_53, 256, 256>;

The testcase g++.dg/vect/pr68762-2.cc exercises this on x86_64 with
partial vector usage enabled and AVX512 support.

PR tree-optimization/115868
* tree-vect-stmts.cc (vectorizable_simd_clone_call): Correctly
compute the number of mask copies required for vect_record_loop_mask.

gcc/tree-vect-stmts.cc

index 2e4d500d1f26add3349f4bbf05951896c7dc20c6..8530a98e6d692892de20dae174739b797cb923ed 100644 (file)
@@ -4349,9 +4349,14 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info,
            case SIMD_CLONE_ARG_TYPE_MASK:
              if (loop_vinfo
                  && LOOP_VINFO_CAN_USE_PARTIAL_VECTORS_P (loop_vinfo))
-               vect_record_loop_mask (loop_vinfo,
-                                      &LOOP_VINFO_MASKS (loop_vinfo),
-                                      ncopies, vectype, op);
+               {
+                 unsigned nmasks
+                   = exact_div (ncopies * bestn->simdclone->simdlen,
+                                TYPE_VECTOR_SUBPARTS (vectype)).to_constant ();
+                 vect_record_loop_mask (loop_vinfo,
+                                        &LOOP_VINFO_MASKS (loop_vinfo),
+                                        nmasks, vectype, op);
+               }
 
              break;
            }