]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
x86: Add -mmove-max=bits and -mstore-max=bits
authorH.J. Lu <hjl.tools@gmail.com>
Tue, 16 Nov 2021 02:52:56 +0000 (18:52 -0800)
committerH.J. Lu <hjl.tools@gmail.com>
Fri, 3 Dec 2021 17:57:06 +0000 (09:57 -0800)
Add -mmove-max=bits and -mstore-max=bits to enable 256-bit/512-bit move
and store, independent of -mprefer-vector-width=bits:

1. Add X86_TUNE_AVX512_MOVE_BY_PIECES and X86_TUNE_AVX512_STORE_BY_PIECES
which are enabled for Intel Sapphire Rapids processor.
2. Add -mmove-max=bits to set the maximum number of bits can be moved from
memory to memory efficiently.  The default value is derived from
X86_TUNE_AVX512_MOVE_BY_PIECES, X86_TUNE_AVX256_MOVE_BY_PIECES, and the
preferred vector width.
3. Add -mstore-max=bits to set the maximum number of bits can be stored to
memory efficiently.  The default value is derived from
X86_TUNE_AVX512_STORE_BY_PIECES, X86_TUNE_AVX256_STORE_BY_PIECES and the
preferred vector width.

gcc/

PR target/103269
* config/i386/i386-expand.c (ix86_expand_builtin): Pass PVW_NONE
and PVW_NONE to ix86_target_string.
* config/i386/i386-options.c (ix86_target_string): Add arguments
for move_max and store_max.
(ix86_target_string::add_vector_width): New lambda.
(ix86_debug_options): Pass ix86_move_max and ix86_store_max to
ix86_target_string.
(ix86_function_specific_print): Pass ptr->x_ix86_move_max and
ptr->x_ix86_store_max to ix86_target_string.
(ix86_valid_target_attribute_tree): Handle x_ix86_move_max and
x_ix86_store_max.
(ix86_option_override_internal): Set the default x_ix86_move_max
and x_ix86_store_max.
* config/i386/i386-options.h (ix86_target_string): Add
prefer_vector_width and prefer_vector_width.
* config/i386/i386.h (TARGET_AVX256_MOVE_BY_PIECES): Removed.
(TARGET_AVX256_STORE_BY_PIECES): Likewise.
(MOVE_MAX): Use 64 if ix86_move_max or ix86_store_max ==
PVW_AVX512.  Use 32 if ix86_move_max or ix86_store_max >=
PVW_AVX256.
(STORE_MAX_PIECES): Use 64 if ix86_store_max == PVW_AVX512.
Use 32 if ix86_store_max >= PVW_AVX256.
* config/i386/i386.opt: Add -mmove-max=bits and -mstore-max=bits.
* config/i386/x86-tune.def (X86_TUNE_AVX512_MOVE_BY_PIECES): New.
(X86_TUNE_AVX512_STORE_BY_PIECES): Likewise.
* doc/invoke.texi: Document -mmove-max=bits and -mstore-max=bits.

gcc/testsuite/

PR target/103269
* gcc.target/i386/pieces-memcpy-17.c: New test.
* gcc.target/i386/pieces-memcpy-18.c: Likewise.
* gcc.target/i386/pieces-memcpy-19.c: Likewise.
* gcc.target/i386/pieces-memcpy-20.c: Likewise.
* gcc.target/i386/pieces-memcpy-21.c: Likewise.
* gcc.target/i386/pieces-memset-45.c: Likewise.
* gcc.target/i386/pieces-memset-46.c: Likewise.
* gcc.target/i386/pieces-memset-47.c: Likewise.
* gcc.target/i386/pieces-memset-48.c: Likewise.
* gcc.target/i386/pieces-memset-49.c: Likewise.

17 files changed:
gcc/config/i386/i386-expand.c
gcc/config/i386/i386-options.c
gcc/config/i386/i386-options.h
gcc/config/i386/i386.h
gcc/config/i386/i386.opt
gcc/config/i386/x86-tune.def
gcc/doc/invoke.texi
gcc/testsuite/gcc.target/i386/pieces-memcpy-17.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pieces-memcpy-18.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pieces-memcpy-19.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pieces-memcpy-20.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pieces-memcpy-21.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pieces-memset-45.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pieces-memset-46.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pieces-memset-47.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pieces-memset-48.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pieces-memset-49.c [new file with mode: 0644]

index 068c5c23358a8a256bbf80ffcfccf04538c3d475..2bbb28e5317644e798465130ff467d6ec2d42368 100644 (file)
@@ -12305,6 +12305,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
       char *opts = ix86_target_string (bisa, bisa2, 0, 0, NULL, NULL,
                                       (enum fpmath_unit) 0,
                                       (enum prefer_vector_width) 0,
+                                      PVW_NONE, PVW_NONE,
                                       false, add_abi_p);
       if (!opts)
        error ("%qE needs unknown isa option", fndecl);
index f971e03abad5589b38b79637ca923baa3ff106b1..53bd55a12e3595a87ca604b14d3b39edac30f4f9 100644 (file)
@@ -364,6 +364,8 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
                    const char *arch, const char *tune,
                    enum fpmath_unit fpmath,
                    enum prefer_vector_width pvw,
+                   enum prefer_vector_width move_max,
+                   enum prefer_vector_width store_max,
                    bool add_nl_p, bool add_abi_p)
 {
   /* Flag options.  */
@@ -542,10 +544,10 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
        }
     }
 
-  /* Add -mprefer-vector-width= option.  */
-  if (pvw)
+  auto add_vector_width = [&opts, &num] (prefer_vector_width pvw,
+                                        const char *cmd)
     {
-      opts[num][0] = "-mprefer-vector-width=";
+      opts[num][0] = cmd;
       switch ((int) pvw)
        {
        case PVW_AVX128:
@@ -563,7 +565,19 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
        default:
          gcc_unreachable ();
        }
-    }
+    };
+
+  /* Add -mprefer-vector-width= option.  */
+  if (pvw)
+    add_vector_width (pvw, "-mprefer-vector-width=");
+
+  /* Add -mmove-max= option.  */
+  if (move_max)
+    add_vector_width (move_max, "-mmove-max=");
+
+  /* Add -mstore-max= option.  */
+  if (store_max)
+    add_vector_width (store_max, "-mstore-max=");
 
   /* Any options?  */
   if (num == 0)
@@ -630,6 +644,7 @@ ix86_debug_options (void)
                                   target_flags, ix86_target_flags,
                                   ix86_arch_string, ix86_tune_string,
                                   ix86_fpmath, prefer_vector_width_type,
+                                  ix86_move_max, ix86_store_max,
                                   true, true);
 
   if (opts)
@@ -892,7 +907,9 @@ ix86_function_specific_print (FILE *file, int indent,
     = ix86_target_string (ptr->x_ix86_isa_flags, ptr->x_ix86_isa_flags2,
                          ptr->x_target_flags, ptr->x_ix86_target_flags,
                          NULL, NULL, ptr->x_ix86_fpmath,
-                         ptr->x_prefer_vector_width_type, false, true);
+                         ptr->x_prefer_vector_width_type,
+                         ptr->x_ix86_move_max, ptr->x_ix86_store_max,
+                         false, true);
 
   gcc_assert (ptr->arch < PROCESSOR_max);
   fprintf (file, "%*sarch = %d (%s)\n",
@@ -1318,6 +1335,10 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args,
   const char *orig_tune_string = opts->x_ix86_tune_string;
   enum fpmath_unit orig_fpmath_set = opts_set->x_ix86_fpmath;
   enum prefer_vector_width orig_pvw_set = opts_set->x_prefer_vector_width_type;
+  enum prefer_vector_width orig_ix86_move_max_set
+    = opts_set->x_ix86_move_max;
+  enum prefer_vector_width orig_ix86_store_max_set
+    = opts_set->x_ix86_store_max;
   int orig_tune_defaulted = ix86_tune_defaulted;
   int orig_arch_specified = ix86_arch_specified;
   char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL };
@@ -1393,6 +1414,8 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args,
       opts->x_ix86_tune_string = orig_tune_string;
       opts_set->x_ix86_fpmath = orig_fpmath_set;
       opts_set->x_prefer_vector_width_type = orig_pvw_set;
+      opts_set->x_ix86_move_max = orig_ix86_move_max_set;
+      opts_set->x_ix86_store_max = orig_ix86_store_max_set;
       opts->x_ix86_excess_precision = orig_ix86_excess_precision;
       opts->x_ix86_unsafe_math_optimizations
        = orig_ix86_unsafe_math_optimizations;
@@ -2691,6 +2714,48 @@ ix86_option_override_internal (bool main_args_p,
       && (opts_set->x_prefer_vector_width_type == PVW_NONE))
     opts->x_prefer_vector_width_type = PVW_AVX256;
 
+  if (opts_set->x_ix86_move_max == PVW_NONE)
+    {
+      /* Set the maximum number of bits can be moved from memory to
+        memory efficiently.  */
+      if (ix86_tune_features[X86_TUNE_AVX512_MOVE_BY_PIECES])
+       opts->x_ix86_move_max = PVW_AVX512;
+      else if (ix86_tune_features[X86_TUNE_AVX256_MOVE_BY_PIECES])
+       opts->x_ix86_move_max = PVW_AVX256;
+      else
+       {
+         opts->x_ix86_move_max = opts->x_prefer_vector_width_type;
+         if (opts_set->x_ix86_move_max == PVW_NONE)
+           {
+             if (TARGET_AVX512F_P (opts->x_ix86_isa_flags))
+               opts->x_ix86_move_max = PVW_AVX512;
+             else
+               opts->x_ix86_move_max = PVW_AVX128;
+           }
+       }
+    }
+
+  if (opts_set->x_ix86_store_max == PVW_NONE)
+    {
+      /* Set the maximum number of bits can be stored to memory
+        efficiently.  */
+      if (ix86_tune_features[X86_TUNE_AVX512_STORE_BY_PIECES])
+       opts->x_ix86_store_max = PVW_AVX512;
+      else if (ix86_tune_features[X86_TUNE_AVX256_STORE_BY_PIECES])
+       opts->x_ix86_store_max = PVW_AVX256;
+      else
+       {
+         opts->x_ix86_store_max = opts->x_prefer_vector_width_type;
+         if (opts_set->x_ix86_store_max == PVW_NONE)
+           {
+             if (TARGET_AVX512F_P (opts->x_ix86_isa_flags))
+               opts->x_ix86_store_max = PVW_AVX512;
+             else
+               opts->x_ix86_store_max = PVW_AVX128;
+           }
+       }
+    }
+
   if (opts->x_ix86_recip_name)
     {
       char *p = ASTRDUP (opts->x_ix86_recip_name);
index cdaca2644f413d811f44ee7a5b1585ce135637a4..e218e24d15bbf4893219d5d28397237498a4b9b9 100644 (file)
@@ -26,8 +26,10 @@ char *ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_INT isa2,
                          int flags, int flags2,
                          const char *arch, const char *tune,
                          enum fpmath_unit fpmath,
-                         enum prefer_vector_width pvw, bool add_nl_p,
-                         bool add_abi_p);
+                         enum prefer_vector_width pvw,
+                         enum prefer_vector_width move_max,
+                         enum prefer_vector_width store_max,
+                         bool add_nl_p, bool add_abi_p);
 
 extern enum attr_cpu ix86_schedule;
 
index 2fda1e0686e175f89cde34d1f1d2159e96704007..4f70085d79348e1cc68c168f351ebbf04ab2b9e5 100644 (file)
@@ -408,10 +408,6 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
        ix86_tune_features[X86_TUNE_AVOID_LEA_FOR_ADDR]
 #define TARGET_SOFTWARE_PREFETCHING_BENEFICIAL \
        ix86_tune_features[X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL]
-#define TARGET_AVX256_MOVE_BY_PIECES \
-       ix86_tune_features[X86_TUNE_AVX256_MOVE_BY_PIECES]
-#define TARGET_AVX256_STORE_BY_PIECES \
-       ix86_tune_features[X86_TUNE_AVX256_STORE_BY_PIECES]
 #define TARGET_AVX256_SPLIT_REGS \
        ix86_tune_features[X86_TUNE_AVX256_SPLIT_REGS]
 #define TARGET_GENERAL_REGS_SSE_SPILL \
@@ -1807,12 +1803,13 @@ typedef struct ix86_args {
    MOVE_MAX_PIECES defaults to MOVE_MAX.  */
 
 #define MOVE_MAX \
-  ((TARGET_AVX512F && !TARGET_PREFER_AVX256) \
+  ((TARGET_AVX512F \
+    && (ix86_move_max == PVW_AVX512 \
+       || ix86_store_max == PVW_AVX512)) \
    ? 64 \
    : ((TARGET_AVX \
-       && !TARGET_PREFER_AVX128 \
-       && (TARGET_AVX256_MOVE_BY_PIECES \
-          || TARGET_AVX256_STORE_BY_PIECES)) \
+       && (ix86_move_max >= PVW_AVX256 \
+          || ix86_store_max >= PVW_AVX256)) \
       ? 32 \
       : ((TARGET_SSE2 \
          && TARGET_SSE_UNALIGNED_LOAD_OPTIMAL \
@@ -1825,11 +1822,10 @@ typedef struct ix86_args {
    store_by_pieces of 16/32/64 bytes.  */
 #define STORE_MAX_PIECES \
   (TARGET_INTER_UNIT_MOVES_TO_VEC \
-   ? ((TARGET_AVX512F && !TARGET_PREFER_AVX256) \
+   ? ((TARGET_AVX512F && ix86_store_max == PVW_AVX512) \
       ? 64 \
       : ((TARGET_AVX \
-         && !TARGET_PREFER_AVX128 \
-         && TARGET_AVX256_STORE_BY_PIECES) \
+         && ix86_store_max >= PVW_AVX256) \
          ? 32 \
          : ((TARGET_SSE2 \
              && TARGET_SSE_UNALIGNED_STORE_OPTIMAL) \
index 3e67c537bb7b67fcb60bf6085c46e5e0ec736167..e1af3e417b0ac092889dda49f314392cb95627be 100644 (file)
@@ -624,6 +624,14 @@ Enum(prefer_vector_width) String(256) Value(PVW_AVX256)
 EnumValue
 Enum(prefer_vector_width) String(512) Value(PVW_AVX512)
 
+mmove-max=
+Target RejectNegative Joined Var(ix86_move_max) Enum(prefer_vector_width) Init(PVW_NONE) Save
+Maximum number of bits that can be moved from memory to memory efficiently.
+
+mstore-max=
+Target RejectNegative Joined Var(ix86_store_max) Enum(prefer_vector_width) Init(PVW_NONE) Save
+Maximum number of bits that can be stored to memory efficiently.
+
 ;; ISA support
 
 m32
index dab4b3d3c0bc6f0f9ec04cae2d1eed16fd38ed69..20cb662737848ccde54f0511a31decf9acbc4c6a 100644 (file)
@@ -512,6 +512,16 @@ DEF_TUNE (X86_TUNE_AVX256_MOVE_BY_PIECES, "avx256_move_by_pieces",
 DEF_TUNE (X86_TUNE_AVX256_STORE_BY_PIECES, "avx256_store_by_pieces",
          m_CORE_AVX512)
 
+/* X86_TUNE_AVX512_MOVE_BY_PIECES: Optimize move_by_pieces with 512-bit
+   AVX instructions.  */
+DEF_TUNE (X86_TUNE_AVX512_MOVE_BY_PIECES, "avx512_move_by_pieces",
+         m_SAPPHIRERAPIDS)
+
+/* X86_TUNE_AVX512_STORE_BY_PIECES: Optimize store_by_pieces with 512-bit
+   AVX instructions.  */
+DEF_TUNE (X86_TUNE_AVX512_STORE_BY_PIECES, "avx512_store_by_pieces",
+         m_SAPPHIRERAPIDS)
+
 /*****************************************************************************/
 /*****************************************************************************/
 /* Historical relics: tuning flags that helps a specific old CPU designs     */
index 6111a6b67944298984c4cffb197ce362c625fe16..510ed079b99374028e38d20f3edbac12d75f7842 100644 (file)
@@ -1394,6 +1394,7 @@ See RS/6000 and PowerPC Options.
 -mcld  -mcx16  -msahf  -mmovbe  -mcrc32 -mmwait @gol
 -mrecip  -mrecip=@var{opt} @gol
 -mvzeroupper  -mprefer-avx128  -mprefer-vector-width=@var{opt} @gol
+-mmove-max=@var{bits} -mstore-max=@var{bits} @gol
 -mmmx  -msse  -msse2  -msse3  -mssse3  -msse4.1  -msse4.2  -msse4  -mavx @gol
 -mavx2  -mavx512f  -mavx512pf  -mavx512er  -mavx512cd  -mavx512vl @gol
 -mavx512bw  -mavx512dq  -mavx512ifma  -mavx512vbmi  -msha  -maes @gol
@@ -31872,6 +31873,18 @@ This option instructs GCC to use 128-bit AVX instructions instead of
 This option instructs GCC to use @var{opt}-bit vector width in instructions
 instead of default on the selected platform.
 
+@item -mmove-max=@var{bits}
+@opindex mmove-max
+This option instructs GCC to set the maximum number of bits can be
+moved from memory to memory efficiently to @var{bits}.  The valid
+@var{bits} are 128, 256 and 512.
+
+@item -mstore-max=@var{bits}
+@opindex mstore-max
+This option instructs GCC to set the maximum number of bits can be
+stored to memory efficiently to @var{bits}.  The valid @var{bits} are
+128, 256 and 512.
+
 @table @samp
 @item none
 No extra limitations applied to GCC other than defined by the selected platform.
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-17.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-17.c
new file mode 100644 (file)
index 0000000..28ab7a6
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -mprefer-vector-width=256 -mavx512f -mmove-max=512" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+  __builtin_memcpy (dst, src, 66);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*%zmm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-18.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-18.c
new file mode 100644 (file)
index 0000000..b15a0db
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=sapphirerapids" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+  __builtin_memcpy (dst, src, 66);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*%zmm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-19.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-19.c
new file mode 100644 (file)
index 0000000..a5b5b61
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=sapphirerapids -mmove-max=128 -mstore-max=128" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+  __builtin_memcpy (dst, src, 66);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 8 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-20.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-20.c
new file mode 100644 (file)
index 0000000..1feff48
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=sapphirerapids -mmove-max=256 -mstore-max=256" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+  __builtin_memcpy (dst, src, 66);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu(?:64|)\[ \\t\]+\[^\n\]*%ymm" 4 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memcpy-21.c b/gcc/testsuite/gcc.target/i386/pieces-memcpy-21.c
new file mode 100644 (file)
index 0000000..ef439f2
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=sapphirerapids -march=x86-64 -mavx2" } */
+
+extern char *dst, *src;
+
+void
+foo (void)
+{
+  __builtin_memcpy (dst, src, 66);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu(?:64|)\[ \\t\]+\[^\n\]*%ymm" 4 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-45.c b/gcc/testsuite/gcc.target/i386/pieces-memset-45.c
new file mode 100644 (file)
index 0000000..70c80e5
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64 -mprefer-vector-width=256 -mavx512f -mtune-ctrl=avx512_store_by_pieces" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 66);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu64\[ \\t\]+\[^\n\]*%zmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-46.c b/gcc/testsuite/gcc.target/i386/pieces-memset-46.c
new file mode 100644 (file)
index 0000000..ab7894a
--- /dev/null
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=sapphirerapids" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 66);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu8\[ \\t\]+\[^\n\]*%zmm" 1 } } */
+/* { dg-final { scan-assembler-times "vmovw\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-47.c b/gcc/testsuite/gcc.target/i386/pieces-memset-47.c
new file mode 100644 (file)
index 0000000..8f2c254
--- /dev/null
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=sapphirerapids -mstore-max=128" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 66);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu(?:8|)\[ \\t\]+\[^\n\]*%xmm" 4 } } */
+/* { dg-final { scan-assembler-times "vmovw\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-48.c b/gcc/testsuite/gcc.target/i386/pieces-memset-48.c
new file mode 100644 (file)
index 0000000..9a7da96
--- /dev/null
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=sapphirerapids -mstore-max=256" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 66);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu(?:8|)\[ \\t\]+\[^\n\]*%ymm" 2 } } */
+/* { dg-final { scan-assembler-times "vmovw\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pieces-memset-49.c b/gcc/testsuite/gcc.target/i386/pieces-memset-49.c
new file mode 100644 (file)
index 0000000..ad43f89
--- /dev/null
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=sapphirerapids -march=x86-64 -mavx2" } */
+
+extern char *dst;
+
+void
+foo (void)
+{
+  __builtin_memset (dst, 3, 66);
+}
+
+/* { dg-final { scan-assembler-times "vmovdqu(?:8|)\[ \\t\]+\[^\n\]*%ymm" 2 } } */
+/* No need to dynamically realign the stack here.  */
+/* { dg-final { scan-assembler-not "and\[^\n\r]*%\[re\]sp" } } */
+/* Nor use a frame pointer.  */
+/* { dg-final { scan-assembler-not "%\[re\]bp" } } */