]> git.ipfire.org Git - thirdparty/glibc.git/blobdiff - sysdeps/i386/i586/memcopy.h
Prefer https to http for gnu.org and fsf.org URLs
[thirdparty/glibc.git] / sysdeps / i386 / i586 / memcopy.h
index 0a8768788e82d128d8673ee5a0bcaa7db0757866..43b9689110943434140a0169db2aa6faa4f3dd84 100644 (file)
@@ -1,27 +1,24 @@
 /* memcopy.h -- definitions for memory copy functions.  Pentium version.
-   Copyright (C) 1994, 1995 Free Software Foundation, Inc.
+   Copyright (C) 1994-2019 Free Software Foundation, Inc.
    Contributed by Torbjorn Granlund (tege@sics.se).
+   This file is part of the GNU C Library.
 
-This file is part of the GNU C Library.
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
 
-The GNU C Library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Library General Public License as
-published by the Free Software Foundation; either version 2 of the
-License, or (at your option) any later version.
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
 
-The GNU C Library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-Library General Public License for more details.
-
-You should have received a copy of the GNU Library General Public
-License along with the GNU C Library; see the file COPYING.LIB.  If
-not, write to the Free Software Foundation, Inc., 675 Mass Ave,
-Cambridge, MA 02139, USA.  */
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
 
 /* Get the i386 definitions.  We will override some of them below.  */
-#include_next <memcopy.h>
-
+#include <sysdeps/i386/memcopy.h>
 
 /* Written like this, the Pentium pipeline can execute the loop at a
    sustained rate of 2 instructions/clock, or asymptotically 480
@@ -33,11 +30,14 @@ Cambridge, MA 02139, USA.  */
     {                                                                  \
       asm volatile ("subl      $32,%2\n"                               \
                    "js         2f\n"                                   \
+                   "movl       0(%0),%%edx\n"  /* alloc dest line */   \
                    "1:\n"                                              \
-                   "movl       0(%1),%%eax\n"                          \
-                   "movl       4(%1),%%edx\n"                          \
-                   "movl       %%eax,0(%0)\n"                          \
-                   "movl       %%edx,4(%0)\n"                          \
+                   "movl       28(%0),%%eax\n" /* alloc dest line */   \
+                   "subl       $32,%2\n"       /* decr loop count */   \
+                   "movl       0(%1),%%eax\n"  /* U pipe */            \
+                   "movl       4(%1),%%edx\n"  /* V pipe */            \
+                   "movl       %%eax,0(%0)\n"  /* U pipe */            \
+                   "movl       %%edx,4(%0)\n"  /* V pipe */            \
                    "movl       8(%1),%%eax\n"                          \
                    "movl       12(%1),%%edx\n"                         \
                    "movl       %%eax,8(%0)\n"                          \
@@ -50,9 +50,8 @@ Cambridge, MA 02139, USA.  */
                    "movl       28(%1),%%edx\n"                         \
                    "movl       %%eax,24(%0)\n"                         \
                    "movl       %%edx,28(%0)\n"                         \
-                   "addl       $32,%1\n"                               \
-                   "addl       $32,%0\n"                               \
-                   "subl       $32,%2\n"                               \
+                   "leal       32(%1),%1\n"    /* update src ptr */    \
+                   "leal       32(%0),%0\n"    /* update dst ptr */    \
                    "jns        1b\n"                                   \
                    "2: addl    $32,%2" :                               \
                    "=r" (dst_bp), "=r" (src_bp), "=r" (nbytes_left) :  \
@@ -66,7 +65,10 @@ Cambridge, MA 02139, USA.  */
     {                                                                  \
       asm volatile ("subl      $32,%2\n"                               \
                    "js         2f\n"                                   \
+                   "movl       -4(%0),%%edx\n"                         \
                    "1:\n"                                              \
+                   "movl       -32(%0),%%eax\n"                        \
+                   "subl       $32,%2\n"                               \
                    "movl       -4(%1),%%eax\n"                         \
                    "movl       -8(%1),%%edx\n"                         \
                    "movl       %%eax,-4(%0)\n"                         \
@@ -83,9 +85,8 @@ Cambridge, MA 02139, USA.  */
                    "movl       -32(%1),%%edx\n"                        \
                    "movl       %%eax,-28(%0)\n"                        \
                    "movl       %%edx,-32(%0)\n"                        \
-                   "subl       $32,%1\n"                               \
-                   "subl       $32,%0\n"                               \
-                   "subl       $32,%2\n"                               \
+                   "leal       -32(%1),%1\n"                           \
+                   "leal       -32(%0),%0\n"                           \
                    "jns        1b\n"                                   \
                    "2: addl    $32,%2" :                               \
                    "=r" (dst_ep), "=r" (src_ep), "=r" (nbytes_left) :  \