]>
Commit | Line | Data |
---|---|---|
28f540f4 | 1 | /* memcopy.h -- definitions for memory copy functions. Pentium version. |
f7a9f785 | 2 | Copyright (C) 1994-2016 Free Software Foundation, Inc. |
28f540f4 | 3 | Contributed by Torbjorn Granlund (tege@sics.se). |
478b92f0 UD |
4 | This file is part of the GNU C Library. |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
41bdb6e2 AJ |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
478b92f0 UD |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
41bdb6e2 | 14 | Lesser General Public License for more details. |
478b92f0 | 15 | |
41bdb6e2 | 16 | You should have received a copy of the GNU Lesser General Public |
59ba27a6 PE |
17 | License along with the GNU C Library; if not, see |
18 | <http://www.gnu.org/licenses/>. */ | |
28f540f4 RM |
19 | |
20 | /* Get the i386 definitions. We will override some of them below. */ | |
3d42e04d | 21 | #include <sysdeps/i386/memcopy.h> |
28f540f4 RM |
22 | |
23 | /* Written like this, the Pentium pipeline can execute the loop at a | |
24 | sustained rate of 2 instructions/clock, or asymptotically 480 | |
25 | Mbytes/second at 60Mhz. */ | |
26 | ||
27 | #undef WORD_COPY_FWD | |
28 | #define WORD_COPY_FWD(dst_bp, src_bp, nbytes_left, nbytes) \ | |
29 | do \ | |
30 | { \ | |
31 | asm volatile ("subl $32,%2\n" \ | |
32 | "js 2f\n" \ | |
4ca84cff | 33 | "movl 0(%0),%%edx\n" /* alloc dest line */ \ |
28f540f4 | 34 | "1:\n" \ |
4ca84cff RM |
35 | "movl 28(%0),%%eax\n" /* alloc dest line */ \ |
36 | "subl $32,%2\n" /* decr loop count */ \ | |
37 | "movl 0(%1),%%eax\n" /* U pipe */ \ | |
38 | "movl 4(%1),%%edx\n" /* V pipe */ \ | |
39 | "movl %%eax,0(%0)\n" /* U pipe */ \ | |
40 | "movl %%edx,4(%0)\n" /* V pipe */ \ | |
28f540f4 RM |
41 | "movl 8(%1),%%eax\n" \ |
42 | "movl 12(%1),%%edx\n" \ | |
43 | "movl %%eax,8(%0)\n" \ | |
44 | "movl %%edx,12(%0)\n" \ | |
45 | "movl 16(%1),%%eax\n" \ | |
46 | "movl 20(%1),%%edx\n" \ | |
47 | "movl %%eax,16(%0)\n" \ | |
48 | "movl %%edx,20(%0)\n" \ | |
49 | "movl 24(%1),%%eax\n" \ | |
50 | "movl 28(%1),%%edx\n" \ | |
51 | "movl %%eax,24(%0)\n" \ | |
52 | "movl %%edx,28(%0)\n" \ | |
4ca84cff RM |
53 | "leal 32(%1),%1\n" /* update src ptr */ \ |
54 | "leal 32(%0),%0\n" /* update dst ptr */ \ | |
28f540f4 RM |
55 | "jns 1b\n" \ |
56 | "2: addl $32,%2" : \ | |
57 | "=r" (dst_bp), "=r" (src_bp), "=r" (nbytes_left) : \ | |
58 | "0" (dst_bp), "1" (src_bp), "2" (nbytes) : \ | |
59 | "ax", "dx"); \ | |
60 | } while (0) | |
61 | ||
62 | #undef WORD_COPY_BWD | |
63 | #define WORD_COPY_BWD(dst_ep, src_ep, nbytes_left, nbytes) \ | |
64 | do \ | |
65 | { \ | |
66 | asm volatile ("subl $32,%2\n" \ | |
67 | "js 2f\n" \ | |
4ca84cff | 68 | "movl -4(%0),%%edx\n" \ |
28f540f4 | 69 | "1:\n" \ |
4ca84cff RM |
70 | "movl -32(%0),%%eax\n" \ |
71 | "subl $32,%2\n" \ | |
28f540f4 RM |
72 | "movl -4(%1),%%eax\n" \ |
73 | "movl -8(%1),%%edx\n" \ | |
74 | "movl %%eax,-4(%0)\n" \ | |
75 | "movl %%edx,-8(%0)\n" \ | |
76 | "movl -12(%1),%%eax\n" \ | |
77 | "movl -16(%1),%%edx\n" \ | |
78 | "movl %%eax,-12(%0)\n" \ | |
79 | "movl %%edx,-16(%0)\n" \ | |
80 | "movl -20(%1),%%eax\n" \ | |
81 | "movl -24(%1),%%edx\n" \ | |
82 | "movl %%eax,-20(%0)\n" \ | |
83 | "movl %%edx,-24(%0)\n" \ | |
84 | "movl -28(%1),%%eax\n" \ | |
85 | "movl -32(%1),%%edx\n" \ | |
86 | "movl %%eax,-28(%0)\n" \ | |
87 | "movl %%edx,-32(%0)\n" \ | |
4ca84cff RM |
88 | "leal -32(%1),%1\n" \ |
89 | "leal -32(%0),%0\n" \ | |
28f540f4 RM |
90 | "jns 1b\n" \ |
91 | "2: addl $32,%2" : \ | |
8f5ca04b RM |
92 | "=r" (dst_ep), "=r" (src_ep), "=r" (nbytes_left) : \ |
93 | "0" (dst_ep), "1" (src_ep), "2" (nbytes) : \ | |
28f540f4 RM |
94 | "ax", "dx"); \ |
95 | } while (0) |