]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
x86: Don't align destination for a single instruction
authorH.J. Lu <hjl.tools@gmail.com>
Sat, 13 Sep 2025 13:38:44 +0000 (06:38 -0700)
committerH.J. Lu <hjl.tools@gmail.com>
Mon, 15 Sep 2025 12:02:46 +0000 (05:02 -0700)
If a single instruction can store or move the whole block of memory, use
vector instruction and don't align destination.

gcc/

PR target/121934
* config/i386/i386-expand.cc (ix86_expand_set_or_cpymem): If a
single instruction can store or move the whole block of memory,
use vector instruction and don't align destination.

gcc/testsuite/

PR target/121934
* gcc.target/i386/pr121934-1a.c: New test.
* gcc.target/i386/pr121934-1b.c: Likewise.
* gcc.target/i386/pr121934-2a.c: Likewise.
* gcc.target/i386/pr121934-2b.c: Likewise.
* gcc.target/i386/pr121934-3a.c: Likewise.
* gcc.target/i386/pr121934-3b.c: Likewise.
* gcc.target/i386/pr121934-4a.c: Likewise.
* gcc.target/i386/pr121934-4b.c: Likewise.
* gcc.target/i386/pr121934-5a.c: Likewise.
* gcc.target/i386/pr121934-5b.c: Likewise.

Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
gcc/config/i386/i386-expand.cc
gcc/testsuite/gcc.target/i386/pr121934-1a.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr121934-1b.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr121934-2a.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr121934-2b.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr121934-3a.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr121934-3b.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr121934-4a.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr121934-4b.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr121934-5a.c [new file with mode: 0644]
gcc/testsuite/gcc.target/i386/pr121934-5b.c [new file with mode: 0644]

index dc26b3452cb161381de65f5a975d4d47cf892783..b0b9e6da946930c43c4fbf9e79746f0a365a6c90 100644 (file)
@@ -9552,9 +9552,20 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
   if (!issetmem)
     srcreg = ix86_copy_addr_to_reg (XEXP (src, 0));
 
+  bool aligned_dstmem = false;
+  unsigned int nunits = issetmem ? STORE_MAX_PIECES : MOVE_MAX;
+  bool single_insn_p = count && count <= nunits;
+  if (single_insn_p)
+    {
+      /* If it can be done with a single instruction, use vector
+        instruction and don't align destination.  */
+      alg = vector_loop;
+      noalign = true;
+      dynamic_check = -1;
+    }
+
   unroll_factor = 1;
   move_mode = word_mode;
-  int nunits;
   switch (alg)
     {
     case libcall:
@@ -9576,7 +9587,6 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
       need_zero_guard = true;
       unroll_factor = 4;
       /* Get the vector mode to move STORE_MAX_PIECES/MOVE_MAX bytes.  */
-      nunits = issetmem ? STORE_MAX_PIECES : MOVE_MAX;
       nunits /= GET_MODE_SIZE (word_mode);
       if (nunits > 1)
        {
@@ -9629,28 +9639,32 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
     }
   gcc_assert (desired_align >= 1 && align >= 1);
 
-  /* Misaligned move sequences handle both prologue and epilogue at once.
-     Default code generation results in a smaller code for large alignments
-     and also avoids redundant job when sizes are known precisely.  */
-  misaligned_prologue_used
-    = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
-       && MAX (desired_align, epilogue_size_needed) <= 32
-       && desired_align <= epilogue_size_needed
-       && ((desired_align > align && !align_bytes)
-          || (!count && epilogue_size_needed > 1)));
-
-  /* Destination is aligned after the misaligned prologue.  */
-  bool aligned_dstmem = misaligned_prologue_used;
-
-  if (noalign && !misaligned_prologue_used)
-    {
-      /* Also use misaligned prologue if alignment isn't needed and
-        destination isn't aligned.   Since alignment isn't needed,
-        the destination after prologue won't be aligned.  */
-      aligned_dstmem = (GET_MODE_ALIGNMENT (move_mode)
-                       <= MEM_ALIGN (dst));
-      if (!aligned_dstmem)
-       misaligned_prologue_used = true;
+  if (!single_insn_p)
+    {
+      /* Misaligned move sequences handle both prologue and epilogue
+        at once.  Default code generation results in a smaller code
+        for large alignments and also avoids redundant job when sizes
+        are known precisely.  */
+      misaligned_prologue_used
+       = (TARGET_MISALIGNED_MOVE_STRING_PRO_EPILOGUES
+          && MAX (desired_align, epilogue_size_needed) <= 32
+          && desired_align <= epilogue_size_needed
+          && ((desired_align > align && !align_bytes)
+              || (!count && epilogue_size_needed > 1)));
+
+      /* Destination is aligned after the misaligned prologue.  */
+      aligned_dstmem = misaligned_prologue_used;
+
+      if (noalign && !misaligned_prologue_used)
+       {
+         /* Also use misaligned prologue if alignment isn't needed and
+            destination isn't aligned.   Since alignment isn't needed,
+            the destination after prologue won't be aligned.  */
+         aligned_dstmem = (GET_MODE_ALIGNMENT (move_mode)
+                           <= MEM_ALIGN (dst));
+         if (!aligned_dstmem)
+           misaligned_prologue_used = true;
+       }
     }
 
   /* Do the cheap promotion to allow better CSE across the
diff --git a/gcc/testsuite/gcc.target/i386/pr121934-1a.c b/gcc/testsuite/gcc.target/i386/pr121934-1a.c
new file mode 100644 (file)
index 0000000..6b68813
--- /dev/null
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-dominator-opts -fno-tree-vrp -fno-tree-ccp -fno-tree-forwprop -fno-tree-pre -fno-tree-fre" } */
+
+extern int f();
+int a, b, c, d[3];
+void g() {
+  int h;
+  if (f()) {
+    if (b)
+    i:
+      c > 0;
+    a = 0;
+    for (h = 0; h < 3; h++) {
+      if (a != 1)
+        __builtin_printf("0\n");
+      d[h] = -1;
+    }
+    goto i;
+  }
+}
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121934-1b.c b/gcc/testsuite/gcc.target/i386/pr121934-1b.c
new file mode 100644 (file)
index 0000000..47381ec
--- /dev/null
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-dominator-opts -fno-tree-vrp -fno-tree-ccp -fno-tree-forwprop -fno-tree-pre -fno-tree-fre -mmemset-strategy=rep_byte:8192:align,libcall:-1:noalign" } */
+
+#include "pr121934-1a.c"
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
+/* { dg-final { scan-assembler-not "movb\[ \\t\]+\\\$-1" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121934-2a.c b/gcc/testsuite/gcc.target/i386/pr121934-2a.c
new file mode 100644 (file)
index 0000000..49def11
--- /dev/null
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-dominator-opts -fno-tree-vrp -fno-tree-ccp -fno-tree-forwprop -fno-tree-pre -fno-tree-fre" } */
+
+extern int f();
+int a, b, c;
+long long int d[3];
+void g() {
+  int h;
+  if (f()) {
+    if (b)
+    i:
+      c > 0;
+    a = 0;
+    for (h = 0; h < 3; h++) {
+      if (a != 1)
+        __builtin_printf("0\n");
+      d[h] = (long long int) -1;
+    }
+    goto i;
+  }
+}
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121934-2b.c b/gcc/testsuite/gcc.target/i386/pr121934-2b.c
new file mode 100644 (file)
index 0000000..1c634df
--- /dev/null
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-dominator-opts -fno-tree-vrp -fno-tree-ccp -fno-tree-forwprop -fno-tree-pre -fno-tree-fre -mmemset-strategy=rep_byte:8192:align,libcall:-1:noalign" } */
+
+#include "pr121934-2a.c"
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
+/* { dg-final { scan-assembler-not "movb\[ \\t\]+\\\$-1" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121934-3a.c b/gcc/testsuite/gcc.target/i386/pr121934-3a.c
new file mode 100644 (file)
index 0000000..0c04b69
--- /dev/null
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-dominator-opts -fno-tree-vrp -fno-tree-ccp -fno-tree-forwprop -fno-tree-pre -fno-tree-fre -msse2" } */
+
+extern int f();
+int a, b, c;
+_BitInt(128) d[3];
+void g() {
+  int h;
+  if (f()) {
+    if (b)
+    i:
+      c > 0;
+    a = 0;
+    for (h = 0; h < 3; h++) {
+      if (a != 1)
+        __builtin_printf("0\n");
+      d[h] = (_BitInt(128)) -1;
+    }
+    goto i;
+  }
+}
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121934-3b.c b/gcc/testsuite/gcc.target/i386/pr121934-3b.c
new file mode 100644 (file)
index 0000000..ff4b083
--- /dev/null
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-dominator-opts -fno-tree-vrp -fno-tree-ccp -fno-tree-forwprop -fno-tree-pre -fno-tree-fre -msse2 -mmemset-strategy=rep_byte:8192:align,libcall:-1:noalign" } */
+
+#include "pr121934-3a.c"
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
+/* { dg-final { scan-assembler-not "movb\[ \\t\]+\\\$-1" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121934-4a.c b/gcc/testsuite/gcc.target/i386/pr121934-4a.c
new file mode 100644 (file)
index 0000000..5aa3e06
--- /dev/null
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-dominator-opts -fno-tree-vrp -fno-tree-ccp -fno-tree-forwprop -fno-tree-pre -fno-tree-fre -mno-avx512f -mavx -mprefer-vector-width=256" } */
+
+extern int f();
+int a, b, c;
+_BitInt(256) d[3];
+void g() {
+  int h;
+  if (f()) {
+    if (b)
+    i:
+      c > 0;
+    a = 0;
+    for (h = 0; h < 3; h++) {
+      if (a != 1)
+        __builtin_printf("0\n");
+      d[h] = (_BitInt(256)) -1;
+    }
+    goto i;
+  }
+}
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121934-4b.c b/gcc/testsuite/gcc.target/i386/pr121934-4b.c
new file mode 100644 (file)
index 0000000..5f8241d
--- /dev/null
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-dominator-opts -fno-tree-vrp -fno-tree-ccp -fno-tree-forwprop -fno-tree-pre -fno-tree-fre -mno-avx512f -mavx -mprefer-vector-width=256 -mmemset-strategy=rep_byte:8192:align,libcall:-1:noalign" } */
+
+#include "pr121934-4a.c"
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
+/* { dg-final { scan-assembler-not "movb\[ \\t\]+\\\$-1" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121934-5a.c b/gcc/testsuite/gcc.target/i386/pr121934-5a.c
new file mode 100644 (file)
index 0000000..10be0dd
--- /dev/null
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-dominator-opts -fno-tree-vrp -fno-tree-ccp -fno-tree-forwprop -fno-tree-pre -fno-tree-fre -mavx512f -mprefer-vector-width=512" } */
+
+extern int f();
+int a, b, c;
+_BitInt(512) d[3];
+void g() {
+  int h;
+  if (f()) {
+    if (b)
+    i:
+      c > 0;
+    a = 0;
+    for (h = 0; h < 3; h++) {
+      if (a != 1)
+        __builtin_printf("0\n");
+      d[h] = (_BitInt(512)) -1;
+    }
+    goto i;
+  }
+}
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr121934-5b.c b/gcc/testsuite/gcc.target/i386/pr121934-5b.c
new file mode 100644 (file)
index 0000000..6a45a8a
--- /dev/null
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -fno-tree-dominator-opts -fno-tree-vrp -fno-tree-ccp -fno-tree-forwprop -fno-tree-pre -fno-tree-fre -mavx512f -mprefer-vector-width=512 -mmemset-strategy=rep_byte:8192:align,libcall:-1:noalign" } */
+
+#include "pr121934-5a.c"
+
+/* { dg-final { scan-assembler-not "rep stos" } } */
+/* { dg-final { scan-assembler-not "movb\[ \\t\]+\\\$-1" } } */