/* When X86_TUNE_AVX512_TWO_EPILOGUES is enabled arrange for both
a AVX2 and a SSE epilogue for AVX512 vectorized loops. */
if (loop_vinfo
+ && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
+ && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32
&& ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
- {
- if (GET_MODE_SIZE (loop_vinfo->vector_mode) == 64)
- m_suggested_epilogue_mode = V32QImode;
- else if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)
- && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32)
- m_suggested_epilogue_mode = V16QImode;
- }
+ m_suggested_epilogue_mode = V16QImode;
/* When a 128bit SSE vectorized epilogue still has a VF of 16 or larger
enable a 64bit SSE epilogue. */
if (loop_vinfo
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx2 -mno-avx512f -mtune=generic -fdump-tree-vect-optimized" } */
+
+int test (signed char *data, int n)
+{
+ int sum = 0;
+ for (int i = 0; i < n; ++i)
+ sum += data[i];
+ return sum;
+}
+
+/* { dg-final { scan-tree-dump "loop vectorized using 32 byte vectors" "vect" } } */
+/* { dg-final { scan-tree-dump "loop vectorized using 16 byte vectors" "vect" } } */
+/* { dg-final { scan-tree-dump "loop vectorized using 8 byte vectors" "vect" { target { ! ia32 } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512bw -mtune=generic -fdump-tree-vect-optimized" } */
+
+int test (signed char *data, int n)
+{
+ int sum = 0;
+ for (int i = 0; i < n; ++i)
+ sum += data[i];
+ return sum;
+}
+
+/* { dg-final { scan-tree-dump "loop vectorized using 64 byte vectors" "vect" } } */
+/* { dg-final { scan-tree-dump "loop vectorized using 32 byte vectors" "vect" } } */
+/* { dg-final { scan-tree-dump-not "loop vectorized using 16 byte vectors" "vect" } } */
+/* { dg-final { scan-tree-dump-not "loop vectorized using 8 byte vectors" "vect" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512bw -mtune=znver4 -fdump-tree-vect-optimized" } */
+
+int test (signed char *data, int n)
+{
+ int sum = 0;
+ for (int i = 0; i < n; ++i)
+ sum += data[i];
+ return sum;
+}
+
+/* { dg-final { scan-tree-dump "loop vectorized using 64 byte vectors" "vect" } } */
+/* { dg-final { scan-tree-dump "loop vectorized using 32 byte vectors" "vect" } } */
+/* { dg-final { scan-tree-dump "loop vectorized using 16 byte vectors" "vect" } } */
+/* { dg-final { scan-tree-dump "loop vectorized using 8 byte vectors" "vect" { target { ! ia32 } } } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512bw -mtune=generic --param vect-partial-vector-usage=1 -fdump-tree-vect-optimized" } */
+
+int test (signed char *data, int n)
+{
+ int sum = 0;
+ for (int i = 0; i < n; ++i)
+ sum += data[i];
+ return sum;
+}
+
+/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-not "loop vectorized using 32 byte vectors" "vect" } } */
--- /dev/null
+/* { dg-do compile } */
+/* { dg-options "-O3 -mavx512bw -mtune=znver4 --param vect-partial-vector-usage=1 -fdump-tree-vect-optimized" } */
+
+int test (signed char *data, int n)
+{
+ int sum = 0;
+ for (int i = 0; i < n; ++i)
+ sum += data[i];
+ return sum;
+}
+
+/* { dg-final { scan-tree-dump-times "loop vectorized using 64 byte vectors" 2 "vect" } } */
+/* { dg-final { scan-tree-dump-not "loop vectorized using 32 byte vectors" "vect" } } */