]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / x86_64 / multiarch / memcpy-ssse3-back.S
1 /* memcpy with SSSE3 and REP string
2 Copyright (C) 2010-2018 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20 #include <sysdep.h>
21
22 #if IS_IN (libc)
23
24 #include "asm-syntax.h"
25
26 #ifndef MEMCPY
27 # define MEMCPY __memcpy_ssse3_back
28 # define MEMCPY_CHK __memcpy_chk_ssse3_back
29 # define MEMPCPY __mempcpy_ssse3_back
30 # define MEMPCPY_CHK __mempcpy_chk_ssse3_back
31 #endif
32
33 #define JMPTBL(I, B) I - B
34
35 /* Branch to an entry in a jump table. TABLE is a jump table with
36 relative offsets. INDEX is a register contains the index into the
37 jump table. SCALE is the scale of INDEX. */
38 #define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
39 lea TABLE(%rip), %r11; \
40 movslq (%r11, INDEX, SCALE), INDEX; \
41 lea (%r11, INDEX), INDEX; \
42 jmp *INDEX; \
43 ud2
44
45 .section .text.ssse3,"ax",@progbits
46 #if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
47 ENTRY (MEMPCPY_CHK)
48 cmpq %rdx, %rcx
49 jb HIDDEN_JUMPTARGET (__chk_fail)
50 END (MEMPCPY_CHK)
51
52 ENTRY (MEMPCPY)
53 movq %rdi, %rax
54 addq %rdx, %rax
55 jmp L(start)
56 END (MEMPCPY)
57 #endif
58
59 #if !defined USE_AS_BCOPY
60 ENTRY (MEMCPY_CHK)
61 cmpq %rdx, %rcx
62 jb HIDDEN_JUMPTARGET (__chk_fail)
63 END (MEMCPY_CHK)
64 #endif
65
66 ENTRY (MEMCPY)
67 mov %rdi, %rax
68 #ifdef USE_AS_MEMPCPY
69 add %rdx, %rax
70 #endif
71
72 #ifdef USE_AS_MEMMOVE
73 cmp %rsi, %rdi
74 jb L(copy_forward)
75 je L(bwd_write_0bytes)
76 cmp $144, %rdx
77 jae L(copy_backward)
78 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
79 L(copy_forward):
80 #endif
81 L(start):
82 cmp $144, %rdx
83 jae L(144bytesormore)
84
85 L(fwd_write_less32bytes):
86 #ifndef USE_AS_MEMMOVE
87 cmp %dil, %sil
88 jbe L(bk_write)
89 #endif
90 add %rdx, %rsi
91 add %rdx, %rdi
92 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
93 #ifndef USE_AS_MEMMOVE
94 L(bk_write):
95
96 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
97 #endif
98
99 .p2align 4
100 L(144bytesormore):
101
102 #ifndef USE_AS_MEMMOVE
103 cmp %dil, %sil
104 jle L(copy_backward)
105 #endif
106 movdqu (%rsi), %xmm0
107 mov %rdi, %r8
108 and $-16, %rdi
109 add $16, %rdi
110 mov %rdi, %r9
111 sub %r8, %r9
112 sub %r9, %rdx
113 add %r9, %rsi
114 mov %rsi, %r9
115 and $0xf, %r9
116 jz L(shl_0)
117 #ifdef DATA_CACHE_SIZE
118 mov $DATA_CACHE_SIZE, %RCX_LP
119 #else
120 mov __x86_data_cache_size(%rip), %RCX_LP
121 #endif
122 cmp %rcx, %rdx
123 jae L(gobble_mem_fwd)
124 lea L(shl_table_fwd)(%rip), %r11
125 sub $0x80, %rdx
126 movslq (%r11, %r9, 4), %r9
127 add %r11, %r9
128 jmp *%r9
129 ud2
130
131 .p2align 4
132 L(copy_backward):
133 #ifdef DATA_CACHE_SIZE
134 mov $DATA_CACHE_SIZE, %RCX_LP
135 #else
136 mov __x86_data_cache_size(%rip), %RCX_LP
137 #endif
138 shl $1, %rcx
139 cmp %rcx, %rdx
140 ja L(gobble_mem_bwd)
141
142 add %rdx, %rdi
143 add %rdx, %rsi
144 movdqu -16(%rsi), %xmm0
145 lea -16(%rdi), %r8
146 mov %rdi, %r9
147 and $0xf, %r9
148 xor %r9, %rdi
149 sub %r9, %rsi
150 sub %r9, %rdx
151 mov %rsi, %r9
152 and $0xf, %r9
153 jz L(shl_0_bwd)
154 lea L(shl_table_bwd)(%rip), %r11
155 sub $0x80, %rdx
156 movslq (%r11, %r9, 4), %r9
157 add %r11, %r9
158 jmp *%r9
159 ud2
160
161 .p2align 4
162 L(shl_0):
163
164 mov %rdx, %r9
165 shr $8, %r9
166 add %rdx, %r9
167 #ifdef DATA_CACHE_SIZE
168 cmp $DATA_CACHE_SIZE_HALF, %R9_LP
169 #else
170 cmp __x86_data_cache_size_half(%rip), %R9_LP
171 #endif
172 jae L(gobble_mem_fwd)
173 sub $0x80, %rdx
174 .p2align 4
175 L(shl_0_loop):
176 movdqa (%rsi), %xmm1
177 movdqa %xmm1, (%rdi)
178 movaps 0x10(%rsi), %xmm2
179 movaps %xmm2, 0x10(%rdi)
180 movaps 0x20(%rsi), %xmm3
181 movaps %xmm3, 0x20(%rdi)
182 movaps 0x30(%rsi), %xmm4
183 movaps %xmm4, 0x30(%rdi)
184 movaps 0x40(%rsi), %xmm1
185 movaps %xmm1, 0x40(%rdi)
186 movaps 0x50(%rsi), %xmm2
187 movaps %xmm2, 0x50(%rdi)
188 movaps 0x60(%rsi), %xmm3
189 movaps %xmm3, 0x60(%rdi)
190 movaps 0x70(%rsi), %xmm4
191 movaps %xmm4, 0x70(%rdi)
192 sub $0x80, %rdx
193 lea 0x80(%rsi), %rsi
194 lea 0x80(%rdi), %rdi
195 jae L(shl_0_loop)
196 movdqu %xmm0, (%r8)
197 add $0x80, %rdx
198 add %rdx, %rsi
199 add %rdx, %rdi
200 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
201
202 .p2align 4
203 L(shl_0_bwd):
204 sub $0x80, %rdx
205 L(copy_backward_loop):
206 movaps -0x10(%rsi), %xmm1
207 movaps %xmm1, -0x10(%rdi)
208 movaps -0x20(%rsi), %xmm2
209 movaps %xmm2, -0x20(%rdi)
210 movaps -0x30(%rsi), %xmm3
211 movaps %xmm3, -0x30(%rdi)
212 movaps -0x40(%rsi), %xmm4
213 movaps %xmm4, -0x40(%rdi)
214 movaps -0x50(%rsi), %xmm5
215 movaps %xmm5, -0x50(%rdi)
216 movaps -0x60(%rsi), %xmm5
217 movaps %xmm5, -0x60(%rdi)
218 movaps -0x70(%rsi), %xmm5
219 movaps %xmm5, -0x70(%rdi)
220 movaps -0x80(%rsi), %xmm5
221 movaps %xmm5, -0x80(%rdi)
222 sub $0x80, %rdx
223 lea -0x80(%rdi), %rdi
224 lea -0x80(%rsi), %rsi
225 jae L(copy_backward_loop)
226
227 movdqu %xmm0, (%r8)
228 add $0x80, %rdx
229 sub %rdx, %rdi
230 sub %rdx, %rsi
231 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
232
233 .p2align 4
234 L(shl_1):
235 sub $0x80, %rdx
236 movaps -0x01(%rsi), %xmm1
237 movaps 0x0f(%rsi), %xmm2
238 movaps 0x1f(%rsi), %xmm3
239 movaps 0x2f(%rsi), %xmm4
240 movaps 0x3f(%rsi), %xmm5
241 movaps 0x4f(%rsi), %xmm6
242 movaps 0x5f(%rsi), %xmm7
243 movaps 0x6f(%rsi), %xmm8
244 movaps 0x7f(%rsi), %xmm9
245 lea 0x80(%rsi), %rsi
246 palignr $1, %xmm8, %xmm9
247 movaps %xmm9, 0x70(%rdi)
248 palignr $1, %xmm7, %xmm8
249 movaps %xmm8, 0x60(%rdi)
250 palignr $1, %xmm6, %xmm7
251 movaps %xmm7, 0x50(%rdi)
252 palignr $1, %xmm5, %xmm6
253 movaps %xmm6, 0x40(%rdi)
254 palignr $1, %xmm4, %xmm5
255 movaps %xmm5, 0x30(%rdi)
256 palignr $1, %xmm3, %xmm4
257 movaps %xmm4, 0x20(%rdi)
258 palignr $1, %xmm2, %xmm3
259 movaps %xmm3, 0x10(%rdi)
260 palignr $1, %xmm1, %xmm2
261 movaps %xmm2, (%rdi)
262 lea 0x80(%rdi), %rdi
263 jae L(shl_1)
264 movdqu %xmm0, (%r8)
265 add $0x80, %rdx
266 add %rdx, %rdi
267 add %rdx, %rsi
268 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
269
270 .p2align 4
271 L(shl_1_bwd):
272 movaps -0x01(%rsi), %xmm1
273
274 movaps -0x11(%rsi), %xmm2
275 palignr $1, %xmm2, %xmm1
276 movaps %xmm1, -0x10(%rdi)
277
278 movaps -0x21(%rsi), %xmm3
279 palignr $1, %xmm3, %xmm2
280 movaps %xmm2, -0x20(%rdi)
281
282 movaps -0x31(%rsi), %xmm4
283 palignr $1, %xmm4, %xmm3
284 movaps %xmm3, -0x30(%rdi)
285
286 movaps -0x41(%rsi), %xmm5
287 palignr $1, %xmm5, %xmm4
288 movaps %xmm4, -0x40(%rdi)
289
290 movaps -0x51(%rsi), %xmm6
291 palignr $1, %xmm6, %xmm5
292 movaps %xmm5, -0x50(%rdi)
293
294 movaps -0x61(%rsi), %xmm7
295 palignr $1, %xmm7, %xmm6
296 movaps %xmm6, -0x60(%rdi)
297
298 movaps -0x71(%rsi), %xmm8
299 palignr $1, %xmm8, %xmm7
300 movaps %xmm7, -0x70(%rdi)
301
302 movaps -0x81(%rsi), %xmm9
303 palignr $1, %xmm9, %xmm8
304 movaps %xmm8, -0x80(%rdi)
305
306 sub $0x80, %rdx
307 lea -0x80(%rdi), %rdi
308 lea -0x80(%rsi), %rsi
309 jae L(shl_1_bwd)
310 movdqu %xmm0, (%r8)
311 add $0x80, %rdx
312 sub %rdx, %rdi
313 sub %rdx, %rsi
314 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
315
316 .p2align 4
317 L(shl_2):
318 sub $0x80, %rdx
319 movaps -0x02(%rsi), %xmm1
320 movaps 0x0e(%rsi), %xmm2
321 movaps 0x1e(%rsi), %xmm3
322 movaps 0x2e(%rsi), %xmm4
323 movaps 0x3e(%rsi), %xmm5
324 movaps 0x4e(%rsi), %xmm6
325 movaps 0x5e(%rsi), %xmm7
326 movaps 0x6e(%rsi), %xmm8
327 movaps 0x7e(%rsi), %xmm9
328 lea 0x80(%rsi), %rsi
329 palignr $2, %xmm8, %xmm9
330 movaps %xmm9, 0x70(%rdi)
331 palignr $2, %xmm7, %xmm8
332 movaps %xmm8, 0x60(%rdi)
333 palignr $2, %xmm6, %xmm7
334 movaps %xmm7, 0x50(%rdi)
335 palignr $2, %xmm5, %xmm6
336 movaps %xmm6, 0x40(%rdi)
337 palignr $2, %xmm4, %xmm5
338 movaps %xmm5, 0x30(%rdi)
339 palignr $2, %xmm3, %xmm4
340 movaps %xmm4, 0x20(%rdi)
341 palignr $2, %xmm2, %xmm3
342 movaps %xmm3, 0x10(%rdi)
343 palignr $2, %xmm1, %xmm2
344 movaps %xmm2, (%rdi)
345 lea 0x80(%rdi), %rdi
346 jae L(shl_2)
347 movdqu %xmm0, (%r8)
348 add $0x80, %rdx
349 add %rdx, %rdi
350 add %rdx, %rsi
351 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
352
353 .p2align 4
354 L(shl_2_bwd):
355 movaps -0x02(%rsi), %xmm1
356
357 movaps -0x12(%rsi), %xmm2
358 palignr $2, %xmm2, %xmm1
359 movaps %xmm1, -0x10(%rdi)
360
361 movaps -0x22(%rsi), %xmm3
362 palignr $2, %xmm3, %xmm2
363 movaps %xmm2, -0x20(%rdi)
364
365 movaps -0x32(%rsi), %xmm4
366 palignr $2, %xmm4, %xmm3
367 movaps %xmm3, -0x30(%rdi)
368
369 movaps -0x42(%rsi), %xmm5
370 palignr $2, %xmm5, %xmm4
371 movaps %xmm4, -0x40(%rdi)
372
373 movaps -0x52(%rsi), %xmm6
374 palignr $2, %xmm6, %xmm5
375 movaps %xmm5, -0x50(%rdi)
376
377 movaps -0x62(%rsi), %xmm7
378 palignr $2, %xmm7, %xmm6
379 movaps %xmm6, -0x60(%rdi)
380
381 movaps -0x72(%rsi), %xmm8
382 palignr $2, %xmm8, %xmm7
383 movaps %xmm7, -0x70(%rdi)
384
385 movaps -0x82(%rsi), %xmm9
386 palignr $2, %xmm9, %xmm8
387 movaps %xmm8, -0x80(%rdi)
388
389 sub $0x80, %rdx
390 lea -0x80(%rdi), %rdi
391 lea -0x80(%rsi), %rsi
392 jae L(shl_2_bwd)
393 movdqu %xmm0, (%r8)
394 add $0x80, %rdx
395 sub %rdx, %rdi
396 sub %rdx, %rsi
397 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
398
399 .p2align 4
400 L(shl_3):
401 sub $0x80, %rdx
402 movaps -0x03(%rsi), %xmm1
403 movaps 0x0d(%rsi), %xmm2
404 movaps 0x1d(%rsi), %xmm3
405 movaps 0x2d(%rsi), %xmm4
406 movaps 0x3d(%rsi), %xmm5
407 movaps 0x4d(%rsi), %xmm6
408 movaps 0x5d(%rsi), %xmm7
409 movaps 0x6d(%rsi), %xmm8
410 movaps 0x7d(%rsi), %xmm9
411 lea 0x80(%rsi), %rsi
412 palignr $3, %xmm8, %xmm9
413 movaps %xmm9, 0x70(%rdi)
414 palignr $3, %xmm7, %xmm8
415 movaps %xmm8, 0x60(%rdi)
416 palignr $3, %xmm6, %xmm7
417 movaps %xmm7, 0x50(%rdi)
418 palignr $3, %xmm5, %xmm6
419 movaps %xmm6, 0x40(%rdi)
420 palignr $3, %xmm4, %xmm5
421 movaps %xmm5, 0x30(%rdi)
422 palignr $3, %xmm3, %xmm4
423 movaps %xmm4, 0x20(%rdi)
424 palignr $3, %xmm2, %xmm3
425 movaps %xmm3, 0x10(%rdi)
426 palignr $3, %xmm1, %xmm2
427 movaps %xmm2, (%rdi)
428 lea 0x80(%rdi), %rdi
429 jae L(shl_3)
430 movdqu %xmm0, (%r8)
431 add $0x80, %rdx
432 add %rdx, %rdi
433 add %rdx, %rsi
434 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
435
436 .p2align 4
437 L(shl_3_bwd):
438 movaps -0x03(%rsi), %xmm1
439
440 movaps -0x13(%rsi), %xmm2
441 palignr $3, %xmm2, %xmm1
442 movaps %xmm1, -0x10(%rdi)
443
444 movaps -0x23(%rsi), %xmm3
445 palignr $3, %xmm3, %xmm2
446 movaps %xmm2, -0x20(%rdi)
447
448 movaps -0x33(%rsi), %xmm4
449 palignr $3, %xmm4, %xmm3
450 movaps %xmm3, -0x30(%rdi)
451
452 movaps -0x43(%rsi), %xmm5
453 palignr $3, %xmm5, %xmm4
454 movaps %xmm4, -0x40(%rdi)
455
456 movaps -0x53(%rsi), %xmm6
457 palignr $3, %xmm6, %xmm5
458 movaps %xmm5, -0x50(%rdi)
459
460 movaps -0x63(%rsi), %xmm7
461 palignr $3, %xmm7, %xmm6
462 movaps %xmm6, -0x60(%rdi)
463
464 movaps -0x73(%rsi), %xmm8
465 palignr $3, %xmm8, %xmm7
466 movaps %xmm7, -0x70(%rdi)
467
468 movaps -0x83(%rsi), %xmm9
469 palignr $3, %xmm9, %xmm8
470 movaps %xmm8, -0x80(%rdi)
471
472 sub $0x80, %rdx
473 lea -0x80(%rdi), %rdi
474 lea -0x80(%rsi), %rsi
475 jae L(shl_3_bwd)
476 movdqu %xmm0, (%r8)
477 add $0x80, %rdx
478 sub %rdx, %rdi
479 sub %rdx, %rsi
480 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
481
482 .p2align 4
483 L(shl_4):
484 sub $0x80, %rdx
485 movaps -0x04(%rsi), %xmm1
486 movaps 0x0c(%rsi), %xmm2
487 movaps 0x1c(%rsi), %xmm3
488 movaps 0x2c(%rsi), %xmm4
489 movaps 0x3c(%rsi), %xmm5
490 movaps 0x4c(%rsi), %xmm6
491 movaps 0x5c(%rsi), %xmm7
492 movaps 0x6c(%rsi), %xmm8
493 movaps 0x7c(%rsi), %xmm9
494 lea 0x80(%rsi), %rsi
495 palignr $4, %xmm8, %xmm9
496 movaps %xmm9, 0x70(%rdi)
497 palignr $4, %xmm7, %xmm8
498 movaps %xmm8, 0x60(%rdi)
499 palignr $4, %xmm6, %xmm7
500 movaps %xmm7, 0x50(%rdi)
501 palignr $4, %xmm5, %xmm6
502 movaps %xmm6, 0x40(%rdi)
503 palignr $4, %xmm4, %xmm5
504 movaps %xmm5, 0x30(%rdi)
505 palignr $4, %xmm3, %xmm4
506 movaps %xmm4, 0x20(%rdi)
507 palignr $4, %xmm2, %xmm3
508 movaps %xmm3, 0x10(%rdi)
509 palignr $4, %xmm1, %xmm2
510 movaps %xmm2, (%rdi)
511 lea 0x80(%rdi), %rdi
512 jae L(shl_4)
513 movdqu %xmm0, (%r8)
514 add $0x80, %rdx
515 add %rdx, %rdi
516 add %rdx, %rsi
517 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
518
519 .p2align 4
520 L(shl_4_bwd):
521 movaps -0x04(%rsi), %xmm1
522
523 movaps -0x14(%rsi), %xmm2
524 palignr $4, %xmm2, %xmm1
525 movaps %xmm1, -0x10(%rdi)
526
527 movaps -0x24(%rsi), %xmm3
528 palignr $4, %xmm3, %xmm2
529 movaps %xmm2, -0x20(%rdi)
530
531 movaps -0x34(%rsi), %xmm4
532 palignr $4, %xmm4, %xmm3
533 movaps %xmm3, -0x30(%rdi)
534
535 movaps -0x44(%rsi), %xmm5
536 palignr $4, %xmm5, %xmm4
537 movaps %xmm4, -0x40(%rdi)
538
539 movaps -0x54(%rsi), %xmm6
540 palignr $4, %xmm6, %xmm5
541 movaps %xmm5, -0x50(%rdi)
542
543 movaps -0x64(%rsi), %xmm7
544 palignr $4, %xmm7, %xmm6
545 movaps %xmm6, -0x60(%rdi)
546
547 movaps -0x74(%rsi), %xmm8
548 palignr $4, %xmm8, %xmm7
549 movaps %xmm7, -0x70(%rdi)
550
551 movaps -0x84(%rsi), %xmm9
552 palignr $4, %xmm9, %xmm8
553 movaps %xmm8, -0x80(%rdi)
554
555 sub $0x80, %rdx
556 lea -0x80(%rdi), %rdi
557 lea -0x80(%rsi), %rsi
558 jae L(shl_4_bwd)
559 movdqu %xmm0, (%r8)
560 add $0x80, %rdx
561 sub %rdx, %rdi
562 sub %rdx, %rsi
563 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
564
565 .p2align 4
566 L(shl_5):
567 sub $0x80, %rdx
568 movaps -0x05(%rsi), %xmm1
569 movaps 0x0b(%rsi), %xmm2
570 movaps 0x1b(%rsi), %xmm3
571 movaps 0x2b(%rsi), %xmm4
572 movaps 0x3b(%rsi), %xmm5
573 movaps 0x4b(%rsi), %xmm6
574 movaps 0x5b(%rsi), %xmm7
575 movaps 0x6b(%rsi), %xmm8
576 movaps 0x7b(%rsi), %xmm9
577 lea 0x80(%rsi), %rsi
578 palignr $5, %xmm8, %xmm9
579 movaps %xmm9, 0x70(%rdi)
580 palignr $5, %xmm7, %xmm8
581 movaps %xmm8, 0x60(%rdi)
582 palignr $5, %xmm6, %xmm7
583 movaps %xmm7, 0x50(%rdi)
584 palignr $5, %xmm5, %xmm6
585 movaps %xmm6, 0x40(%rdi)
586 palignr $5, %xmm4, %xmm5
587 movaps %xmm5, 0x30(%rdi)
588 palignr $5, %xmm3, %xmm4
589 movaps %xmm4, 0x20(%rdi)
590 palignr $5, %xmm2, %xmm3
591 movaps %xmm3, 0x10(%rdi)
592 palignr $5, %xmm1, %xmm2
593 movaps %xmm2, (%rdi)
594 lea 0x80(%rdi), %rdi
595 jae L(shl_5)
596 movdqu %xmm0, (%r8)
597 add $0x80, %rdx
598 add %rdx, %rdi
599 add %rdx, %rsi
600 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
601
602 .p2align 4
603 L(shl_5_bwd):
604 movaps -0x05(%rsi), %xmm1
605
606 movaps -0x15(%rsi), %xmm2
607 palignr $5, %xmm2, %xmm1
608 movaps %xmm1, -0x10(%rdi)
609
610 movaps -0x25(%rsi), %xmm3
611 palignr $5, %xmm3, %xmm2
612 movaps %xmm2, -0x20(%rdi)
613
614 movaps -0x35(%rsi), %xmm4
615 palignr $5, %xmm4, %xmm3
616 movaps %xmm3, -0x30(%rdi)
617
618 movaps -0x45(%rsi), %xmm5
619 palignr $5, %xmm5, %xmm4
620 movaps %xmm4, -0x40(%rdi)
621
622 movaps -0x55(%rsi), %xmm6
623 palignr $5, %xmm6, %xmm5
624 movaps %xmm5, -0x50(%rdi)
625
626 movaps -0x65(%rsi), %xmm7
627 palignr $5, %xmm7, %xmm6
628 movaps %xmm6, -0x60(%rdi)
629
630 movaps -0x75(%rsi), %xmm8
631 palignr $5, %xmm8, %xmm7
632 movaps %xmm7, -0x70(%rdi)
633
634 movaps -0x85(%rsi), %xmm9
635 palignr $5, %xmm9, %xmm8
636 movaps %xmm8, -0x80(%rdi)
637
638 sub $0x80, %rdx
639 lea -0x80(%rdi), %rdi
640 lea -0x80(%rsi), %rsi
641 jae L(shl_5_bwd)
642 movdqu %xmm0, (%r8)
643 add $0x80, %rdx
644 sub %rdx, %rdi
645 sub %rdx, %rsi
646 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
647
648 .p2align 4
649 L(shl_6):
650 sub $0x80, %rdx
651 movaps -0x06(%rsi), %xmm1
652 movaps 0x0a(%rsi), %xmm2
653 movaps 0x1a(%rsi), %xmm3
654 movaps 0x2a(%rsi), %xmm4
655 movaps 0x3a(%rsi), %xmm5
656 movaps 0x4a(%rsi), %xmm6
657 movaps 0x5a(%rsi), %xmm7
658 movaps 0x6a(%rsi), %xmm8
659 movaps 0x7a(%rsi), %xmm9
660 lea 0x80(%rsi), %rsi
661 palignr $6, %xmm8, %xmm9
662 movaps %xmm9, 0x70(%rdi)
663 palignr $6, %xmm7, %xmm8
664 movaps %xmm8, 0x60(%rdi)
665 palignr $6, %xmm6, %xmm7
666 movaps %xmm7, 0x50(%rdi)
667 palignr $6, %xmm5, %xmm6
668 movaps %xmm6, 0x40(%rdi)
669 palignr $6, %xmm4, %xmm5
670 movaps %xmm5, 0x30(%rdi)
671 palignr $6, %xmm3, %xmm4
672 movaps %xmm4, 0x20(%rdi)
673 palignr $6, %xmm2, %xmm3
674 movaps %xmm3, 0x10(%rdi)
675 palignr $6, %xmm1, %xmm2
676 movaps %xmm2, (%rdi)
677 lea 0x80(%rdi), %rdi
678 jae L(shl_6)
679 movdqu %xmm0, (%r8)
680 add $0x80, %rdx
681 add %rdx, %rdi
682 add %rdx, %rsi
683 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
684
685 .p2align 4
686 L(shl_6_bwd):
687 movaps -0x06(%rsi), %xmm1
688
689 movaps -0x16(%rsi), %xmm2
690 palignr $6, %xmm2, %xmm1
691 movaps %xmm1, -0x10(%rdi)
692
693 movaps -0x26(%rsi), %xmm3
694 palignr $6, %xmm3, %xmm2
695 movaps %xmm2, -0x20(%rdi)
696
697 movaps -0x36(%rsi), %xmm4
698 palignr $6, %xmm4, %xmm3
699 movaps %xmm3, -0x30(%rdi)
700
701 movaps -0x46(%rsi), %xmm5
702 palignr $6, %xmm5, %xmm4
703 movaps %xmm4, -0x40(%rdi)
704
705 movaps -0x56(%rsi), %xmm6
706 palignr $6, %xmm6, %xmm5
707 movaps %xmm5, -0x50(%rdi)
708
709 movaps -0x66(%rsi), %xmm7
710 palignr $6, %xmm7, %xmm6
711 movaps %xmm6, -0x60(%rdi)
712
713 movaps -0x76(%rsi), %xmm8
714 palignr $6, %xmm8, %xmm7
715 movaps %xmm7, -0x70(%rdi)
716
717 movaps -0x86(%rsi), %xmm9
718 palignr $6, %xmm9, %xmm8
719 movaps %xmm8, -0x80(%rdi)
720
721 sub $0x80, %rdx
722 lea -0x80(%rdi), %rdi
723 lea -0x80(%rsi), %rsi
724 jae L(shl_6_bwd)
725 movdqu %xmm0, (%r8)
726 add $0x80, %rdx
727 sub %rdx, %rdi
728 sub %rdx, %rsi
729 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
730
731 .p2align 4
732 L(shl_7):
733 sub $0x80, %rdx
734 movaps -0x07(%rsi), %xmm1
735 movaps 0x09(%rsi), %xmm2
736 movaps 0x19(%rsi), %xmm3
737 movaps 0x29(%rsi), %xmm4
738 movaps 0x39(%rsi), %xmm5
739 movaps 0x49(%rsi), %xmm6
740 movaps 0x59(%rsi), %xmm7
741 movaps 0x69(%rsi), %xmm8
742 movaps 0x79(%rsi), %xmm9
743 lea 0x80(%rsi), %rsi
744 palignr $7, %xmm8, %xmm9
745 movaps %xmm9, 0x70(%rdi)
746 palignr $7, %xmm7, %xmm8
747 movaps %xmm8, 0x60(%rdi)
748 palignr $7, %xmm6, %xmm7
749 movaps %xmm7, 0x50(%rdi)
750 palignr $7, %xmm5, %xmm6
751 movaps %xmm6, 0x40(%rdi)
752 palignr $7, %xmm4, %xmm5
753 movaps %xmm5, 0x30(%rdi)
754 palignr $7, %xmm3, %xmm4
755 movaps %xmm4, 0x20(%rdi)
756 palignr $7, %xmm2, %xmm3
757 movaps %xmm3, 0x10(%rdi)
758 palignr $7, %xmm1, %xmm2
759 movaps %xmm2, (%rdi)
760 lea 0x80(%rdi), %rdi
761 jae L(shl_7)
762 movdqu %xmm0, (%r8)
763 add $0x80, %rdx
764 add %rdx, %rdi
765 add %rdx, %rsi
766 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
767
768 .p2align 4
769 L(shl_7_bwd):
770 movaps -0x07(%rsi), %xmm1
771
772 movaps -0x17(%rsi), %xmm2
773 palignr $7, %xmm2, %xmm1
774 movaps %xmm1, -0x10(%rdi)
775
776 movaps -0x27(%rsi), %xmm3
777 palignr $7, %xmm3, %xmm2
778 movaps %xmm2, -0x20(%rdi)
779
780 movaps -0x37(%rsi), %xmm4
781 palignr $7, %xmm4, %xmm3
782 movaps %xmm3, -0x30(%rdi)
783
784 movaps -0x47(%rsi), %xmm5
785 palignr $7, %xmm5, %xmm4
786 movaps %xmm4, -0x40(%rdi)
787
788 movaps -0x57(%rsi), %xmm6
789 palignr $7, %xmm6, %xmm5
790 movaps %xmm5, -0x50(%rdi)
791
792 movaps -0x67(%rsi), %xmm7
793 palignr $7, %xmm7, %xmm6
794 movaps %xmm6, -0x60(%rdi)
795
796 movaps -0x77(%rsi), %xmm8
797 palignr $7, %xmm8, %xmm7
798 movaps %xmm7, -0x70(%rdi)
799
800 movaps -0x87(%rsi), %xmm9
801 palignr $7, %xmm9, %xmm8
802 movaps %xmm8, -0x80(%rdi)
803
804 sub $0x80, %rdx
805 lea -0x80(%rdi), %rdi
806 lea -0x80(%rsi), %rsi
807 jae L(shl_7_bwd)
808 movdqu %xmm0, (%r8)
809 add $0x80, %rdx
810 sub %rdx, %rdi
811 sub %rdx, %rsi
812 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
813
814 .p2align 4
815 L(shl_8):
816 sub $0x80, %rdx
817 movaps -0x08(%rsi), %xmm1
818 movaps 0x08(%rsi), %xmm2
819 movaps 0x18(%rsi), %xmm3
820 movaps 0x28(%rsi), %xmm4
821 movaps 0x38(%rsi), %xmm5
822 movaps 0x48(%rsi), %xmm6
823 movaps 0x58(%rsi), %xmm7
824 movaps 0x68(%rsi), %xmm8
825 movaps 0x78(%rsi), %xmm9
826 lea 0x80(%rsi), %rsi
827 palignr $8, %xmm8, %xmm9
828 movaps %xmm9, 0x70(%rdi)
829 palignr $8, %xmm7, %xmm8
830 movaps %xmm8, 0x60(%rdi)
831 palignr $8, %xmm6, %xmm7
832 movaps %xmm7, 0x50(%rdi)
833 palignr $8, %xmm5, %xmm6
834 movaps %xmm6, 0x40(%rdi)
835 palignr $8, %xmm4, %xmm5
836 movaps %xmm5, 0x30(%rdi)
837 palignr $8, %xmm3, %xmm4
838 movaps %xmm4, 0x20(%rdi)
839 palignr $8, %xmm2, %xmm3
840 movaps %xmm3, 0x10(%rdi)
841 palignr $8, %xmm1, %xmm2
842 movaps %xmm2, (%rdi)
843 lea 0x80(%rdi), %rdi
844 jae L(shl_8)
845 movdqu %xmm0, (%r8)
846 add $0x80, %rdx
847 add %rdx, %rdi
848 add %rdx, %rsi
849 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
850
851 .p2align 4
852 L(shl_8_bwd):
853 movaps -0x08(%rsi), %xmm1
854
855 movaps -0x18(%rsi), %xmm2
856 palignr $8, %xmm2, %xmm1
857 movaps %xmm1, -0x10(%rdi)
858
859 movaps -0x28(%rsi), %xmm3
860 palignr $8, %xmm3, %xmm2
861 movaps %xmm2, -0x20(%rdi)
862
863 movaps -0x38(%rsi), %xmm4
864 palignr $8, %xmm4, %xmm3
865 movaps %xmm3, -0x30(%rdi)
866
867 movaps -0x48(%rsi), %xmm5
868 palignr $8, %xmm5, %xmm4
869 movaps %xmm4, -0x40(%rdi)
870
871 movaps -0x58(%rsi), %xmm6
872 palignr $8, %xmm6, %xmm5
873 movaps %xmm5, -0x50(%rdi)
874
875 movaps -0x68(%rsi), %xmm7
876 palignr $8, %xmm7, %xmm6
877 movaps %xmm6, -0x60(%rdi)
878
879 movaps -0x78(%rsi), %xmm8
880 palignr $8, %xmm8, %xmm7
881 movaps %xmm7, -0x70(%rdi)
882
883 movaps -0x88(%rsi), %xmm9
884 palignr $8, %xmm9, %xmm8
885 movaps %xmm8, -0x80(%rdi)
886
887 sub $0x80, %rdx
888 lea -0x80(%rdi), %rdi
889 lea -0x80(%rsi), %rsi
890 jae L(shl_8_bwd)
891 L(shl_8_end_bwd):
892 movdqu %xmm0, (%r8)
893 add $0x80, %rdx
894 sub %rdx, %rdi
895 sub %rdx, %rsi
896 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
897
898 .p2align 4
899 L(shl_9):
900 sub $0x80, %rdx
901 movaps -0x09(%rsi), %xmm1
902 movaps 0x07(%rsi), %xmm2
903 movaps 0x17(%rsi), %xmm3
904 movaps 0x27(%rsi), %xmm4
905 movaps 0x37(%rsi), %xmm5
906 movaps 0x47(%rsi), %xmm6
907 movaps 0x57(%rsi), %xmm7
908 movaps 0x67(%rsi), %xmm8
909 movaps 0x77(%rsi), %xmm9
910 lea 0x80(%rsi), %rsi
911 palignr $9, %xmm8, %xmm9
912 movaps %xmm9, 0x70(%rdi)
913 palignr $9, %xmm7, %xmm8
914 movaps %xmm8, 0x60(%rdi)
915 palignr $9, %xmm6, %xmm7
916 movaps %xmm7, 0x50(%rdi)
917 palignr $9, %xmm5, %xmm6
918 movaps %xmm6, 0x40(%rdi)
919 palignr $9, %xmm4, %xmm5
920 movaps %xmm5, 0x30(%rdi)
921 palignr $9, %xmm3, %xmm4
922 movaps %xmm4, 0x20(%rdi)
923 palignr $9, %xmm2, %xmm3
924 movaps %xmm3, 0x10(%rdi)
925 palignr $9, %xmm1, %xmm2
926 movaps %xmm2, (%rdi)
927 lea 0x80(%rdi), %rdi
928 jae L(shl_9)
929 movdqu %xmm0, (%r8)
930 add $0x80, %rdx
931 add %rdx, %rdi
932 add %rdx, %rsi
933 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
934
935 .p2align 4
936 L(shl_9_bwd):
937 movaps -0x09(%rsi), %xmm1
938
939 movaps -0x19(%rsi), %xmm2
940 palignr $9, %xmm2, %xmm1
941 movaps %xmm1, -0x10(%rdi)
942
943 movaps -0x29(%rsi), %xmm3
944 palignr $9, %xmm3, %xmm2
945 movaps %xmm2, -0x20(%rdi)
946
947 movaps -0x39(%rsi), %xmm4
948 palignr $9, %xmm4, %xmm3
949 movaps %xmm3, -0x30(%rdi)
950
951 movaps -0x49(%rsi), %xmm5
952 palignr $9, %xmm5, %xmm4
953 movaps %xmm4, -0x40(%rdi)
954
955 movaps -0x59(%rsi), %xmm6
956 palignr $9, %xmm6, %xmm5
957 movaps %xmm5, -0x50(%rdi)
958
959 movaps -0x69(%rsi), %xmm7
960 palignr $9, %xmm7, %xmm6
961 movaps %xmm6, -0x60(%rdi)
962
963 movaps -0x79(%rsi), %xmm8
964 palignr $9, %xmm8, %xmm7
965 movaps %xmm7, -0x70(%rdi)
966
967 movaps -0x89(%rsi), %xmm9
968 palignr $9, %xmm9, %xmm8
969 movaps %xmm8, -0x80(%rdi)
970
971 sub $0x80, %rdx
972 lea -0x80(%rdi), %rdi
973 lea -0x80(%rsi), %rsi
974 jae L(shl_9_bwd)
975 movdqu %xmm0, (%r8)
976 add $0x80, %rdx
977 sub %rdx, %rdi
978 sub %rdx, %rsi
979 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
980
981 .p2align 4
982 L(shl_10):
983 sub $0x80, %rdx
984 movaps -0x0a(%rsi), %xmm1
985 movaps 0x06(%rsi), %xmm2
986 movaps 0x16(%rsi), %xmm3
987 movaps 0x26(%rsi), %xmm4
988 movaps 0x36(%rsi), %xmm5
989 movaps 0x46(%rsi), %xmm6
990 movaps 0x56(%rsi), %xmm7
991 movaps 0x66(%rsi), %xmm8
992 movaps 0x76(%rsi), %xmm9
993 lea 0x80(%rsi), %rsi
994 palignr $10, %xmm8, %xmm9
995 movaps %xmm9, 0x70(%rdi)
996 palignr $10, %xmm7, %xmm8
997 movaps %xmm8, 0x60(%rdi)
998 palignr $10, %xmm6, %xmm7
999 movaps %xmm7, 0x50(%rdi)
1000 palignr $10, %xmm5, %xmm6
1001 movaps %xmm6, 0x40(%rdi)
1002 palignr $10, %xmm4, %xmm5
1003 movaps %xmm5, 0x30(%rdi)
1004 palignr $10, %xmm3, %xmm4
1005 movaps %xmm4, 0x20(%rdi)
1006 palignr $10, %xmm2, %xmm3
1007 movaps %xmm3, 0x10(%rdi)
1008 palignr $10, %xmm1, %xmm2
1009 movaps %xmm2, (%rdi)
1010 lea 0x80(%rdi), %rdi
1011 jae L(shl_10)
1012 movdqu %xmm0, (%r8)
1013 add $0x80, %rdx
1014 add %rdx, %rdi
1015 add %rdx, %rsi
1016 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1017
1018 .p2align 4
1019 L(shl_10_bwd):
1020 movaps -0x0a(%rsi), %xmm1
1021
1022 movaps -0x1a(%rsi), %xmm2
1023 palignr $10, %xmm2, %xmm1
1024 movaps %xmm1, -0x10(%rdi)
1025
1026 movaps -0x2a(%rsi), %xmm3
1027 palignr $10, %xmm3, %xmm2
1028 movaps %xmm2, -0x20(%rdi)
1029
1030 movaps -0x3a(%rsi), %xmm4
1031 palignr $10, %xmm4, %xmm3
1032 movaps %xmm3, -0x30(%rdi)
1033
1034 movaps -0x4a(%rsi), %xmm5
1035 palignr $10, %xmm5, %xmm4
1036 movaps %xmm4, -0x40(%rdi)
1037
1038 movaps -0x5a(%rsi), %xmm6
1039 palignr $10, %xmm6, %xmm5
1040 movaps %xmm5, -0x50(%rdi)
1041
1042 movaps -0x6a(%rsi), %xmm7
1043 palignr $10, %xmm7, %xmm6
1044 movaps %xmm6, -0x60(%rdi)
1045
1046 movaps -0x7a(%rsi), %xmm8
1047 palignr $10, %xmm8, %xmm7
1048 movaps %xmm7, -0x70(%rdi)
1049
1050 movaps -0x8a(%rsi), %xmm9
1051 palignr $10, %xmm9, %xmm8
1052 movaps %xmm8, -0x80(%rdi)
1053
1054 sub $0x80, %rdx
1055 lea -0x80(%rdi), %rdi
1056 lea -0x80(%rsi), %rsi
1057 jae L(shl_10_bwd)
1058 movdqu %xmm0, (%r8)
1059 add $0x80, %rdx
1060 sub %rdx, %rdi
1061 sub %rdx, %rsi
1062 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1063
1064 .p2align 4
1065 L(shl_11):
1066 sub $0x80, %rdx
1067 movaps -0x0b(%rsi), %xmm1
1068 movaps 0x05(%rsi), %xmm2
1069 movaps 0x15(%rsi), %xmm3
1070 movaps 0x25(%rsi), %xmm4
1071 movaps 0x35(%rsi), %xmm5
1072 movaps 0x45(%rsi), %xmm6
1073 movaps 0x55(%rsi), %xmm7
1074 movaps 0x65(%rsi), %xmm8
1075 movaps 0x75(%rsi), %xmm9
1076 lea 0x80(%rsi), %rsi
1077 palignr $11, %xmm8, %xmm9
1078 movaps %xmm9, 0x70(%rdi)
1079 palignr $11, %xmm7, %xmm8
1080 movaps %xmm8, 0x60(%rdi)
1081 palignr $11, %xmm6, %xmm7
1082 movaps %xmm7, 0x50(%rdi)
1083 palignr $11, %xmm5, %xmm6
1084 movaps %xmm6, 0x40(%rdi)
1085 palignr $11, %xmm4, %xmm5
1086 movaps %xmm5, 0x30(%rdi)
1087 palignr $11, %xmm3, %xmm4
1088 movaps %xmm4, 0x20(%rdi)
1089 palignr $11, %xmm2, %xmm3
1090 movaps %xmm3, 0x10(%rdi)
1091 palignr $11, %xmm1, %xmm2
1092 movaps %xmm2, (%rdi)
1093 lea 0x80(%rdi), %rdi
1094 jae L(shl_11)
1095 movdqu %xmm0, (%r8)
1096 add $0x80, %rdx
1097 add %rdx, %rdi
1098 add %rdx, %rsi
1099 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1100
1101 .p2align 4
1102 L(shl_11_bwd):
1103 movaps -0x0b(%rsi), %xmm1
1104
1105 movaps -0x1b(%rsi), %xmm2
1106 palignr $11, %xmm2, %xmm1
1107 movaps %xmm1, -0x10(%rdi)
1108
1109 movaps -0x2b(%rsi), %xmm3
1110 palignr $11, %xmm3, %xmm2
1111 movaps %xmm2, -0x20(%rdi)
1112
1113 movaps -0x3b(%rsi), %xmm4
1114 palignr $11, %xmm4, %xmm3
1115 movaps %xmm3, -0x30(%rdi)
1116
1117 movaps -0x4b(%rsi), %xmm5
1118 palignr $11, %xmm5, %xmm4
1119 movaps %xmm4, -0x40(%rdi)
1120
1121 movaps -0x5b(%rsi), %xmm6
1122 palignr $11, %xmm6, %xmm5
1123 movaps %xmm5, -0x50(%rdi)
1124
1125 movaps -0x6b(%rsi), %xmm7
1126 palignr $11, %xmm7, %xmm6
1127 movaps %xmm6, -0x60(%rdi)
1128
1129 movaps -0x7b(%rsi), %xmm8
1130 palignr $11, %xmm8, %xmm7
1131 movaps %xmm7, -0x70(%rdi)
1132
1133 movaps -0x8b(%rsi), %xmm9
1134 palignr $11, %xmm9, %xmm8
1135 movaps %xmm8, -0x80(%rdi)
1136
1137 sub $0x80, %rdx
1138 lea -0x80(%rdi), %rdi
1139 lea -0x80(%rsi), %rsi
1140 jae L(shl_11_bwd)
1141 movdqu %xmm0, (%r8)
1142 add $0x80, %rdx
1143 sub %rdx, %rdi
1144 sub %rdx, %rsi
1145 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1146
1147 .p2align 4
1148 L(shl_12):
1149 sub $0x80, %rdx
1150 movdqa -0x0c(%rsi), %xmm1
1151 movaps 0x04(%rsi), %xmm2
1152 movaps 0x14(%rsi), %xmm3
1153 movaps 0x24(%rsi), %xmm4
1154 movaps 0x34(%rsi), %xmm5
1155 movaps 0x44(%rsi), %xmm6
1156 movaps 0x54(%rsi), %xmm7
1157 movaps 0x64(%rsi), %xmm8
1158 movaps 0x74(%rsi), %xmm9
1159 lea 0x80(%rsi), %rsi
1160 palignr $12, %xmm8, %xmm9
1161 movaps %xmm9, 0x70(%rdi)
1162 palignr $12, %xmm7, %xmm8
1163 movaps %xmm8, 0x60(%rdi)
1164 palignr $12, %xmm6, %xmm7
1165 movaps %xmm7, 0x50(%rdi)
1166 palignr $12, %xmm5, %xmm6
1167 movaps %xmm6, 0x40(%rdi)
1168 palignr $12, %xmm4, %xmm5
1169 movaps %xmm5, 0x30(%rdi)
1170 palignr $12, %xmm3, %xmm4
1171 movaps %xmm4, 0x20(%rdi)
1172 palignr $12, %xmm2, %xmm3
1173 movaps %xmm3, 0x10(%rdi)
1174 palignr $12, %xmm1, %xmm2
1175 movaps %xmm2, (%rdi)
1176
1177 lea 0x80(%rdi), %rdi
1178 jae L(shl_12)
1179 movdqu %xmm0, (%r8)
1180 add $0x80, %rdx
1181 add %rdx, %rdi
1182 add %rdx, %rsi
1183 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1184
1185 .p2align 4
1186 L(shl_12_bwd):
1187 movaps -0x0c(%rsi), %xmm1
1188
1189 movaps -0x1c(%rsi), %xmm2
1190 palignr $12, %xmm2, %xmm1
1191 movaps %xmm1, -0x10(%rdi)
1192
1193 movaps -0x2c(%rsi), %xmm3
1194 palignr $12, %xmm3, %xmm2
1195 movaps %xmm2, -0x20(%rdi)
1196
1197 movaps -0x3c(%rsi), %xmm4
1198 palignr $12, %xmm4, %xmm3
1199 movaps %xmm3, -0x30(%rdi)
1200
1201 movaps -0x4c(%rsi), %xmm5
1202 palignr $12, %xmm5, %xmm4
1203 movaps %xmm4, -0x40(%rdi)
1204
1205 movaps -0x5c(%rsi), %xmm6
1206 palignr $12, %xmm6, %xmm5
1207 movaps %xmm5, -0x50(%rdi)
1208
1209 movaps -0x6c(%rsi), %xmm7
1210 palignr $12, %xmm7, %xmm6
1211 movaps %xmm6, -0x60(%rdi)
1212
1213 movaps -0x7c(%rsi), %xmm8
1214 palignr $12, %xmm8, %xmm7
1215 movaps %xmm7, -0x70(%rdi)
1216
1217 movaps -0x8c(%rsi), %xmm9
1218 palignr $12, %xmm9, %xmm8
1219 movaps %xmm8, -0x80(%rdi)
1220
1221 sub $0x80, %rdx
1222 lea -0x80(%rdi), %rdi
1223 lea -0x80(%rsi), %rsi
1224 jae L(shl_12_bwd)
1225 movdqu %xmm0, (%r8)
1226 add $0x80, %rdx
1227 sub %rdx, %rdi
1228 sub %rdx, %rsi
1229 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1230
1231 .p2align 4
1232 L(shl_13):
1233 sub $0x80, %rdx
1234 movaps -0x0d(%rsi), %xmm1
1235 movaps 0x03(%rsi), %xmm2
1236 movaps 0x13(%rsi), %xmm3
1237 movaps 0x23(%rsi), %xmm4
1238 movaps 0x33(%rsi), %xmm5
1239 movaps 0x43(%rsi), %xmm6
1240 movaps 0x53(%rsi), %xmm7
1241 movaps 0x63(%rsi), %xmm8
1242 movaps 0x73(%rsi), %xmm9
1243 lea 0x80(%rsi), %rsi
1244 palignr $13, %xmm8, %xmm9
1245 movaps %xmm9, 0x70(%rdi)
1246 palignr $13, %xmm7, %xmm8
1247 movaps %xmm8, 0x60(%rdi)
1248 palignr $13, %xmm6, %xmm7
1249 movaps %xmm7, 0x50(%rdi)
1250 palignr $13, %xmm5, %xmm6
1251 movaps %xmm6, 0x40(%rdi)
1252 palignr $13, %xmm4, %xmm5
1253 movaps %xmm5, 0x30(%rdi)
1254 palignr $13, %xmm3, %xmm4
1255 movaps %xmm4, 0x20(%rdi)
1256 palignr $13, %xmm2, %xmm3
1257 movaps %xmm3, 0x10(%rdi)
1258 palignr $13, %xmm1, %xmm2
1259 movaps %xmm2, (%rdi)
1260 lea 0x80(%rdi), %rdi
1261 jae L(shl_13)
1262 movdqu %xmm0, (%r8)
1263 add $0x80, %rdx
1264 add %rdx, %rdi
1265 add %rdx, %rsi
1266 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1267
1268 .p2align 4
1269 L(shl_13_bwd):
1270 movaps -0x0d(%rsi), %xmm1
1271
1272 movaps -0x1d(%rsi), %xmm2
1273 palignr $13, %xmm2, %xmm1
1274 movaps %xmm1, -0x10(%rdi)
1275
1276 movaps -0x2d(%rsi), %xmm3
1277 palignr $13, %xmm3, %xmm2
1278 movaps %xmm2, -0x20(%rdi)
1279
1280 movaps -0x3d(%rsi), %xmm4
1281 palignr $13, %xmm4, %xmm3
1282 movaps %xmm3, -0x30(%rdi)
1283
1284 movaps -0x4d(%rsi), %xmm5
1285 palignr $13, %xmm5, %xmm4
1286 movaps %xmm4, -0x40(%rdi)
1287
1288 movaps -0x5d(%rsi), %xmm6
1289 palignr $13, %xmm6, %xmm5
1290 movaps %xmm5, -0x50(%rdi)
1291
1292 movaps -0x6d(%rsi), %xmm7
1293 palignr $13, %xmm7, %xmm6
1294 movaps %xmm6, -0x60(%rdi)
1295
1296 movaps -0x7d(%rsi), %xmm8
1297 palignr $13, %xmm8, %xmm7
1298 movaps %xmm7, -0x70(%rdi)
1299
1300 movaps -0x8d(%rsi), %xmm9
1301 palignr $13, %xmm9, %xmm8
1302 movaps %xmm8, -0x80(%rdi)
1303
1304 sub $0x80, %rdx
1305 lea -0x80(%rdi), %rdi
1306 lea -0x80(%rsi), %rsi
1307 jae L(shl_13_bwd)
1308 movdqu %xmm0, (%r8)
1309 add $0x80, %rdx
1310 sub %rdx, %rdi
1311 sub %rdx, %rsi
1312 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1313
1314 .p2align 4
1315 L(shl_14):
1316 sub $0x80, %rdx
1317 movaps -0x0e(%rsi), %xmm1
1318 movaps 0x02(%rsi), %xmm2
1319 movaps 0x12(%rsi), %xmm3
1320 movaps 0x22(%rsi), %xmm4
1321 movaps 0x32(%rsi), %xmm5
1322 movaps 0x42(%rsi), %xmm6
1323 movaps 0x52(%rsi), %xmm7
1324 movaps 0x62(%rsi), %xmm8
1325 movaps 0x72(%rsi), %xmm9
1326 lea 0x80(%rsi), %rsi
1327 palignr $14, %xmm8, %xmm9
1328 movaps %xmm9, 0x70(%rdi)
1329 palignr $14, %xmm7, %xmm8
1330 movaps %xmm8, 0x60(%rdi)
1331 palignr $14, %xmm6, %xmm7
1332 movaps %xmm7, 0x50(%rdi)
1333 palignr $14, %xmm5, %xmm6
1334 movaps %xmm6, 0x40(%rdi)
1335 palignr $14, %xmm4, %xmm5
1336 movaps %xmm5, 0x30(%rdi)
1337 palignr $14, %xmm3, %xmm4
1338 movaps %xmm4, 0x20(%rdi)
1339 palignr $14, %xmm2, %xmm3
1340 movaps %xmm3, 0x10(%rdi)
1341 palignr $14, %xmm1, %xmm2
1342 movaps %xmm2, (%rdi)
1343 lea 0x80(%rdi), %rdi
1344 jae L(shl_14)
1345 movdqu %xmm0, (%r8)
1346 add $0x80, %rdx
1347 add %rdx, %rdi
1348 add %rdx, %rsi
1349 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1350
1351 .p2align 4
1352 L(shl_14_bwd):
1353 movaps -0x0e(%rsi), %xmm1
1354
1355 movaps -0x1e(%rsi), %xmm2
1356 palignr $14, %xmm2, %xmm1
1357 movaps %xmm1, -0x10(%rdi)
1358
1359 movaps -0x2e(%rsi), %xmm3
1360 palignr $14, %xmm3, %xmm2
1361 movaps %xmm2, -0x20(%rdi)
1362
1363 movaps -0x3e(%rsi), %xmm4
1364 palignr $14, %xmm4, %xmm3
1365 movaps %xmm3, -0x30(%rdi)
1366
1367 movaps -0x4e(%rsi), %xmm5
1368 palignr $14, %xmm5, %xmm4
1369 movaps %xmm4, -0x40(%rdi)
1370
1371 movaps -0x5e(%rsi), %xmm6
1372 palignr $14, %xmm6, %xmm5
1373 movaps %xmm5, -0x50(%rdi)
1374
1375 movaps -0x6e(%rsi), %xmm7
1376 palignr $14, %xmm7, %xmm6
1377 movaps %xmm6, -0x60(%rdi)
1378
1379 movaps -0x7e(%rsi), %xmm8
1380 palignr $14, %xmm8, %xmm7
1381 movaps %xmm7, -0x70(%rdi)
1382
1383 movaps -0x8e(%rsi), %xmm9
1384 palignr $14, %xmm9, %xmm8
1385 movaps %xmm8, -0x80(%rdi)
1386
1387 sub $0x80, %rdx
1388 lea -0x80(%rdi), %rdi
1389 lea -0x80(%rsi), %rsi
1390 jae L(shl_14_bwd)
1391 movdqu %xmm0, (%r8)
1392 add $0x80, %rdx
1393 sub %rdx, %rdi
1394 sub %rdx, %rsi
1395 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1396
1397 .p2align 4
1398 L(shl_15):
1399 sub $0x80, %rdx
1400 movaps -0x0f(%rsi), %xmm1
1401 movaps 0x01(%rsi), %xmm2
1402 movaps 0x11(%rsi), %xmm3
1403 movaps 0x21(%rsi), %xmm4
1404 movaps 0x31(%rsi), %xmm5
1405 movaps 0x41(%rsi), %xmm6
1406 movaps 0x51(%rsi), %xmm7
1407 movaps 0x61(%rsi), %xmm8
1408 movaps 0x71(%rsi), %xmm9
1409 lea 0x80(%rsi), %rsi
1410 palignr $15, %xmm8, %xmm9
1411 movaps %xmm9, 0x70(%rdi)
1412 palignr $15, %xmm7, %xmm8
1413 movaps %xmm8, 0x60(%rdi)
1414 palignr $15, %xmm6, %xmm7
1415 movaps %xmm7, 0x50(%rdi)
1416 palignr $15, %xmm5, %xmm6
1417 movaps %xmm6, 0x40(%rdi)
1418 palignr $15, %xmm4, %xmm5
1419 movaps %xmm5, 0x30(%rdi)
1420 palignr $15, %xmm3, %xmm4
1421 movaps %xmm4, 0x20(%rdi)
1422 palignr $15, %xmm2, %xmm3
1423 movaps %xmm3, 0x10(%rdi)
1424 palignr $15, %xmm1, %xmm2
1425 movaps %xmm2, (%rdi)
1426 lea 0x80(%rdi), %rdi
1427 jae L(shl_15)
1428 movdqu %xmm0, (%r8)
1429 add $0x80, %rdx
1430 add %rdx, %rdi
1431 add %rdx, %rsi
1432 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1433
1434 .p2align 4
1435 L(shl_15_bwd):
1436 movaps -0x0f(%rsi), %xmm1
1437
1438 movaps -0x1f(%rsi), %xmm2
1439 palignr $15, %xmm2, %xmm1
1440 movaps %xmm1, -0x10(%rdi)
1441
1442 movaps -0x2f(%rsi), %xmm3
1443 palignr $15, %xmm3, %xmm2
1444 movaps %xmm2, -0x20(%rdi)
1445
1446 movaps -0x3f(%rsi), %xmm4
1447 palignr $15, %xmm4, %xmm3
1448 movaps %xmm3, -0x30(%rdi)
1449
1450 movaps -0x4f(%rsi), %xmm5
1451 palignr $15, %xmm5, %xmm4
1452 movaps %xmm4, -0x40(%rdi)
1453
1454 movaps -0x5f(%rsi), %xmm6
1455 palignr $15, %xmm6, %xmm5
1456 movaps %xmm5, -0x50(%rdi)
1457
1458 movaps -0x6f(%rsi), %xmm7
1459 palignr $15, %xmm7, %xmm6
1460 movaps %xmm6, -0x60(%rdi)
1461
1462 movaps -0x7f(%rsi), %xmm8
1463 palignr $15, %xmm8, %xmm7
1464 movaps %xmm7, -0x70(%rdi)
1465
1466 movaps -0x8f(%rsi), %xmm9
1467 palignr $15, %xmm9, %xmm8
1468 movaps %xmm8, -0x80(%rdi)
1469
1470 sub $0x80, %rdx
1471 lea -0x80(%rdi), %rdi
1472 lea -0x80(%rsi), %rsi
1473 jae L(shl_15_bwd)
1474 movdqu %xmm0, (%r8)
1475 add $0x80, %rdx
1476 sub %rdx, %rdi
1477 sub %rdx, %rsi
1478 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1479
1480 .p2align 4
1481 L(gobble_mem_fwd):
1482 movdqu (%rsi), %xmm1
1483 movdqu %xmm0, (%r8)
1484 movdqa %xmm1, (%rdi)
1485 sub $16, %rdx
1486 add $16, %rsi
1487 add $16, %rdi
1488
1489 #ifdef SHARED_CACHE_SIZE_HALF
1490 mov $SHARED_CACHE_SIZE_HALF, %RCX_LP
1491 #else
1492 mov __x86_shared_cache_size_half(%rip), %RCX_LP
1493 #endif
1494 #ifdef USE_AS_MEMMOVE
1495 mov %rsi, %r9
1496 sub %rdi, %r9
1497 cmp %rdx, %r9
1498 jae L(memmove_is_memcpy_fwd)
1499 cmp %rcx, %r9
1500 jbe L(ll_cache_copy_fwd_start)
1501 L(memmove_is_memcpy_fwd):
1502 #endif
1503 cmp %rcx, %rdx
1504 ja L(bigger_in_fwd)
1505 mov %rdx, %rcx
1506 L(bigger_in_fwd):
1507 sub %rcx, %rdx
1508 cmp $0x1000, %rdx
1509 jbe L(ll_cache_copy_fwd)
1510
1511 mov %rcx, %r9
1512 shl $3, %r9
1513 cmp %r9, %rdx
1514 jbe L(2steps_copy_fwd)
1515 add %rcx, %rdx
1516 xor %rcx, %rcx
1517 L(2steps_copy_fwd):
1518 sub $0x80, %rdx
1519 L(gobble_mem_fwd_loop):
1520 sub $0x80, %rdx
1521 prefetcht0 0x200(%rsi)
1522 prefetcht0 0x300(%rsi)
1523 movdqu (%rsi), %xmm0
1524 movdqu 0x10(%rsi), %xmm1
1525 movdqu 0x20(%rsi), %xmm2
1526 movdqu 0x30(%rsi), %xmm3
1527 movdqu 0x40(%rsi), %xmm4
1528 movdqu 0x50(%rsi), %xmm5
1529 movdqu 0x60(%rsi), %xmm6
1530 movdqu 0x70(%rsi), %xmm7
1531 lfence
1532 movntdq %xmm0, (%rdi)
1533 movntdq %xmm1, 0x10(%rdi)
1534 movntdq %xmm2, 0x20(%rdi)
1535 movntdq %xmm3, 0x30(%rdi)
1536 movntdq %xmm4, 0x40(%rdi)
1537 movntdq %xmm5, 0x50(%rdi)
1538 movntdq %xmm6, 0x60(%rdi)
1539 movntdq %xmm7, 0x70(%rdi)
1540 lea 0x80(%rsi), %rsi
1541 lea 0x80(%rdi), %rdi
1542 jae L(gobble_mem_fwd_loop)
1543 sfence
1544 cmp $0x80, %rcx
1545 jb L(gobble_mem_fwd_end)
1546 add $0x80, %rdx
1547 L(ll_cache_copy_fwd):
1548 add %rcx, %rdx
1549 L(ll_cache_copy_fwd_start):
1550 sub $0x80, %rdx
1551 L(gobble_ll_loop_fwd):
1552 prefetchnta 0x1c0(%rsi)
1553 prefetchnta 0x280(%rsi)
1554 prefetchnta 0x1c0(%rdi)
1555 prefetchnta 0x280(%rdi)
1556 sub $0x80, %rdx
1557 movdqu (%rsi), %xmm0
1558 movdqu 0x10(%rsi), %xmm1
1559 movdqu 0x20(%rsi), %xmm2
1560 movdqu 0x30(%rsi), %xmm3
1561 movdqu 0x40(%rsi), %xmm4
1562 movdqu 0x50(%rsi), %xmm5
1563 movdqu 0x60(%rsi), %xmm6
1564 movdqu 0x70(%rsi), %xmm7
1565 movdqa %xmm0, (%rdi)
1566 movdqa %xmm1, 0x10(%rdi)
1567 movdqa %xmm2, 0x20(%rdi)
1568 movdqa %xmm3, 0x30(%rdi)
1569 movdqa %xmm4, 0x40(%rdi)
1570 movdqa %xmm5, 0x50(%rdi)
1571 movdqa %xmm6, 0x60(%rdi)
1572 movdqa %xmm7, 0x70(%rdi)
1573 lea 0x80(%rsi), %rsi
1574 lea 0x80(%rdi), %rdi
1575 jae L(gobble_ll_loop_fwd)
1576 L(gobble_mem_fwd_end):
1577 add $0x80, %rdx
1578 add %rdx, %rsi
1579 add %rdx, %rdi
1580 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1581
1582 .p2align 4
1583 L(gobble_mem_bwd):
1584 add %rdx, %rsi
1585 add %rdx, %rdi
1586
1587 movdqu -16(%rsi), %xmm0
1588 lea -16(%rdi), %r8
1589 mov %rdi, %r9
1590 and $-16, %rdi
1591 sub %rdi, %r9
1592 sub %r9, %rsi
1593 sub %r9, %rdx
1594
1595
1596 #ifdef SHARED_CACHE_SIZE_HALF
1597 mov $SHARED_CACHE_SIZE_HALF, %RCX_LP
1598 #else
1599 mov __x86_shared_cache_size_half(%rip), %RCX_LP
1600 #endif
1601 #ifdef USE_AS_MEMMOVE
1602 mov %rdi, %r9
1603 sub %rsi, %r9
1604 cmp %rdx, %r9
1605 jae L(memmove_is_memcpy_bwd)
1606 cmp %rcx, %r9
1607 jbe L(ll_cache_copy_bwd_start)
1608 L(memmove_is_memcpy_bwd):
1609 #endif
1610 cmp %rcx, %rdx
1611 ja L(bigger)
1612 mov %rdx, %rcx
1613 L(bigger):
1614 sub %rcx, %rdx
1615 cmp $0x1000, %rdx
1616 jbe L(ll_cache_copy)
1617
1618 mov %rcx, %r9
1619 shl $3, %r9
1620 cmp %r9, %rdx
1621 jbe L(2steps_copy)
1622 add %rcx, %rdx
1623 xor %rcx, %rcx
1624 L(2steps_copy):
1625 sub $0x80, %rdx
1626 L(gobble_mem_bwd_loop):
1627 sub $0x80, %rdx
1628 prefetcht0 -0x200(%rsi)
1629 prefetcht0 -0x300(%rsi)
1630 movdqu -0x10(%rsi), %xmm1
1631 movdqu -0x20(%rsi), %xmm2
1632 movdqu -0x30(%rsi), %xmm3
1633 movdqu -0x40(%rsi), %xmm4
1634 movdqu -0x50(%rsi), %xmm5
1635 movdqu -0x60(%rsi), %xmm6
1636 movdqu -0x70(%rsi), %xmm7
1637 movdqu -0x80(%rsi), %xmm8
1638 lfence
1639 movntdq %xmm1, -0x10(%rdi)
1640 movntdq %xmm2, -0x20(%rdi)
1641 movntdq %xmm3, -0x30(%rdi)
1642 movntdq %xmm4, -0x40(%rdi)
1643 movntdq %xmm5, -0x50(%rdi)
1644 movntdq %xmm6, -0x60(%rdi)
1645 movntdq %xmm7, -0x70(%rdi)
1646 movntdq %xmm8, -0x80(%rdi)
1647 lea -0x80(%rsi), %rsi
1648 lea -0x80(%rdi), %rdi
1649 jae L(gobble_mem_bwd_loop)
1650 sfence
1651 cmp $0x80, %rcx
1652 jb L(gobble_mem_bwd_end)
1653 add $0x80, %rdx
1654 L(ll_cache_copy):
1655 add %rcx, %rdx
1656 L(ll_cache_copy_bwd_start):
1657 sub $0x80, %rdx
1658 L(gobble_ll_loop):
1659 prefetchnta -0x1c0(%rsi)
1660 prefetchnta -0x280(%rsi)
1661 prefetchnta -0x1c0(%rdi)
1662 prefetchnta -0x280(%rdi)
1663 sub $0x80, %rdx
1664 movdqu -0x10(%rsi), %xmm1
1665 movdqu -0x20(%rsi), %xmm2
1666 movdqu -0x30(%rsi), %xmm3
1667 movdqu -0x40(%rsi), %xmm4
1668 movdqu -0x50(%rsi), %xmm5
1669 movdqu -0x60(%rsi), %xmm6
1670 movdqu -0x70(%rsi), %xmm7
1671 movdqu -0x80(%rsi), %xmm8
1672 movdqa %xmm1, -0x10(%rdi)
1673 movdqa %xmm2, -0x20(%rdi)
1674 movdqa %xmm3, -0x30(%rdi)
1675 movdqa %xmm4, -0x40(%rdi)
1676 movdqa %xmm5, -0x50(%rdi)
1677 movdqa %xmm6, -0x60(%rdi)
1678 movdqa %xmm7, -0x70(%rdi)
1679 movdqa %xmm8, -0x80(%rdi)
1680 lea -0x80(%rsi), %rsi
1681 lea -0x80(%rdi), %rdi
1682 jae L(gobble_ll_loop)
1683 L(gobble_mem_bwd_end):
1684 movdqu %xmm0, (%r8)
1685 add $0x80, %rdx
1686 sub %rdx, %rsi
1687 sub %rdx, %rdi
1688 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1689
1690 .p2align 4
1691 L(fwd_write_128bytes):
1692 lddqu -128(%rsi), %xmm0
1693 movdqu %xmm0, -128(%rdi)
1694 L(fwd_write_112bytes):
1695 lddqu -112(%rsi), %xmm0
1696 movdqu %xmm0, -112(%rdi)
1697 L(fwd_write_96bytes):
1698 lddqu -96(%rsi), %xmm0
1699 movdqu %xmm0, -96(%rdi)
1700 L(fwd_write_80bytes):
1701 lddqu -80(%rsi), %xmm0
1702 movdqu %xmm0, -80(%rdi)
1703 L(fwd_write_64bytes):
1704 lddqu -64(%rsi), %xmm0
1705 movdqu %xmm0, -64(%rdi)
1706 L(fwd_write_48bytes):
1707 lddqu -48(%rsi), %xmm0
1708 movdqu %xmm0, -48(%rdi)
1709 L(fwd_write_32bytes):
1710 lddqu -32(%rsi), %xmm0
1711 movdqu %xmm0, -32(%rdi)
1712 L(fwd_write_16bytes):
1713 lddqu -16(%rsi), %xmm0
1714 movdqu %xmm0, -16(%rdi)
1715 L(fwd_write_0bytes):
1716 ret
1717
1718
1719 .p2align 4
1720 L(fwd_write_143bytes):
1721 lddqu -143(%rsi), %xmm0
1722 movdqu %xmm0, -143(%rdi)
1723 L(fwd_write_127bytes):
1724 lddqu -127(%rsi), %xmm0
1725 movdqu %xmm0, -127(%rdi)
1726 L(fwd_write_111bytes):
1727 lddqu -111(%rsi), %xmm0
1728 movdqu %xmm0, -111(%rdi)
1729 L(fwd_write_95bytes):
1730 lddqu -95(%rsi), %xmm0
1731 movdqu %xmm0, -95(%rdi)
1732 L(fwd_write_79bytes):
1733 lddqu -79(%rsi), %xmm0
1734 movdqu %xmm0, -79(%rdi)
1735 L(fwd_write_63bytes):
1736 lddqu -63(%rsi), %xmm0
1737 movdqu %xmm0, -63(%rdi)
1738 L(fwd_write_47bytes):
1739 lddqu -47(%rsi), %xmm0
1740 movdqu %xmm0, -47(%rdi)
1741 L(fwd_write_31bytes):
1742 lddqu -31(%rsi), %xmm0
1743 lddqu -16(%rsi), %xmm1
1744 movdqu %xmm0, -31(%rdi)
1745 movdqu %xmm1, -16(%rdi)
1746 ret
1747
1748 .p2align 4
1749 L(fwd_write_15bytes):
1750 mov -15(%rsi), %rdx
1751 mov -8(%rsi), %rcx
1752 mov %rdx, -15(%rdi)
1753 mov %rcx, -8(%rdi)
1754 ret
1755
1756 .p2align 4
1757 L(fwd_write_142bytes):
1758 lddqu -142(%rsi), %xmm0
1759 movdqu %xmm0, -142(%rdi)
1760 L(fwd_write_126bytes):
1761 lddqu -126(%rsi), %xmm0
1762 movdqu %xmm0, -126(%rdi)
1763 L(fwd_write_110bytes):
1764 lddqu -110(%rsi), %xmm0
1765 movdqu %xmm0, -110(%rdi)
1766 L(fwd_write_94bytes):
1767 lddqu -94(%rsi), %xmm0
1768 movdqu %xmm0, -94(%rdi)
1769 L(fwd_write_78bytes):
1770 lddqu -78(%rsi), %xmm0
1771 movdqu %xmm0, -78(%rdi)
1772 L(fwd_write_62bytes):
1773 lddqu -62(%rsi), %xmm0
1774 movdqu %xmm0, -62(%rdi)
1775 L(fwd_write_46bytes):
1776 lddqu -46(%rsi), %xmm0
1777 movdqu %xmm0, -46(%rdi)
1778 L(fwd_write_30bytes):
1779 lddqu -30(%rsi), %xmm0
1780 lddqu -16(%rsi), %xmm1
1781 movdqu %xmm0, -30(%rdi)
1782 movdqu %xmm1, -16(%rdi)
1783 ret
1784
1785 .p2align 4
1786 L(fwd_write_14bytes):
1787 mov -14(%rsi), %rdx
1788 mov -8(%rsi), %rcx
1789 mov %rdx, -14(%rdi)
1790 mov %rcx, -8(%rdi)
1791 ret
1792
1793 .p2align 4
1794 L(fwd_write_141bytes):
1795 lddqu -141(%rsi), %xmm0
1796 movdqu %xmm0, -141(%rdi)
1797 L(fwd_write_125bytes):
1798 lddqu -125(%rsi), %xmm0
1799 movdqu %xmm0, -125(%rdi)
1800 L(fwd_write_109bytes):
1801 lddqu -109(%rsi), %xmm0
1802 movdqu %xmm0, -109(%rdi)
1803 L(fwd_write_93bytes):
1804 lddqu -93(%rsi), %xmm0
1805 movdqu %xmm0, -93(%rdi)
1806 L(fwd_write_77bytes):
1807 lddqu -77(%rsi), %xmm0
1808 movdqu %xmm0, -77(%rdi)
1809 L(fwd_write_61bytes):
1810 lddqu -61(%rsi), %xmm0
1811 movdqu %xmm0, -61(%rdi)
1812 L(fwd_write_45bytes):
1813 lddqu -45(%rsi), %xmm0
1814 movdqu %xmm0, -45(%rdi)
1815 L(fwd_write_29bytes):
1816 lddqu -29(%rsi), %xmm0
1817 lddqu -16(%rsi), %xmm1
1818 movdqu %xmm0, -29(%rdi)
1819 movdqu %xmm1, -16(%rdi)
1820 ret
1821
1822 .p2align 4
1823 L(fwd_write_13bytes):
1824 mov -13(%rsi), %rdx
1825 mov -8(%rsi), %rcx
1826 mov %rdx, -13(%rdi)
1827 mov %rcx, -8(%rdi)
1828 ret
1829
1830 .p2align 4
1831 L(fwd_write_140bytes):
1832 lddqu -140(%rsi), %xmm0
1833 movdqu %xmm0, -140(%rdi)
1834 L(fwd_write_124bytes):
1835 lddqu -124(%rsi), %xmm0
1836 movdqu %xmm0, -124(%rdi)
1837 L(fwd_write_108bytes):
1838 lddqu -108(%rsi), %xmm0
1839 movdqu %xmm0, -108(%rdi)
1840 L(fwd_write_92bytes):
1841 lddqu -92(%rsi), %xmm0
1842 movdqu %xmm0, -92(%rdi)
1843 L(fwd_write_76bytes):
1844 lddqu -76(%rsi), %xmm0
1845 movdqu %xmm0, -76(%rdi)
1846 L(fwd_write_60bytes):
1847 lddqu -60(%rsi), %xmm0
1848 movdqu %xmm0, -60(%rdi)
1849 L(fwd_write_44bytes):
1850 lddqu -44(%rsi), %xmm0
1851 movdqu %xmm0, -44(%rdi)
1852 L(fwd_write_28bytes):
1853 lddqu -28(%rsi), %xmm0
1854 lddqu -16(%rsi), %xmm1
1855 movdqu %xmm0, -28(%rdi)
1856 movdqu %xmm1, -16(%rdi)
1857 ret
1858
1859 .p2align 4
1860 L(fwd_write_12bytes):
1861 mov -12(%rsi), %rdx
1862 mov -4(%rsi), %ecx
1863 mov %rdx, -12(%rdi)
1864 mov %ecx, -4(%rdi)
1865 ret
1866
1867 .p2align 4
1868 L(fwd_write_139bytes):
1869 lddqu -139(%rsi), %xmm0
1870 movdqu %xmm0, -139(%rdi)
1871 L(fwd_write_123bytes):
1872 lddqu -123(%rsi), %xmm0
1873 movdqu %xmm0, -123(%rdi)
1874 L(fwd_write_107bytes):
1875 lddqu -107(%rsi), %xmm0
1876 movdqu %xmm0, -107(%rdi)
1877 L(fwd_write_91bytes):
1878 lddqu -91(%rsi), %xmm0
1879 movdqu %xmm0, -91(%rdi)
1880 L(fwd_write_75bytes):
1881 lddqu -75(%rsi), %xmm0
1882 movdqu %xmm0, -75(%rdi)
1883 L(fwd_write_59bytes):
1884 lddqu -59(%rsi), %xmm0
1885 movdqu %xmm0, -59(%rdi)
1886 L(fwd_write_43bytes):
1887 lddqu -43(%rsi), %xmm0
1888 movdqu %xmm0, -43(%rdi)
1889 L(fwd_write_27bytes):
1890 lddqu -27(%rsi), %xmm0
1891 lddqu -16(%rsi), %xmm1
1892 movdqu %xmm0, -27(%rdi)
1893 movdqu %xmm1, -16(%rdi)
1894 ret
1895
1896 .p2align 4
1897 L(fwd_write_11bytes):
1898 mov -11(%rsi), %rdx
1899 mov -4(%rsi), %ecx
1900 mov %rdx, -11(%rdi)
1901 mov %ecx, -4(%rdi)
1902 ret
1903
1904 .p2align 4
1905 L(fwd_write_138bytes):
1906 lddqu -138(%rsi), %xmm0
1907 movdqu %xmm0, -138(%rdi)
1908 L(fwd_write_122bytes):
1909 lddqu -122(%rsi), %xmm0
1910 movdqu %xmm0, -122(%rdi)
1911 L(fwd_write_106bytes):
1912 lddqu -106(%rsi), %xmm0
1913 movdqu %xmm0, -106(%rdi)
1914 L(fwd_write_90bytes):
1915 lddqu -90(%rsi), %xmm0
1916 movdqu %xmm0, -90(%rdi)
1917 L(fwd_write_74bytes):
1918 lddqu -74(%rsi), %xmm0
1919 movdqu %xmm0, -74(%rdi)
1920 L(fwd_write_58bytes):
1921 lddqu -58(%rsi), %xmm0
1922 movdqu %xmm0, -58(%rdi)
1923 L(fwd_write_42bytes):
1924 lddqu -42(%rsi), %xmm0
1925 movdqu %xmm0, -42(%rdi)
1926 L(fwd_write_26bytes):
1927 lddqu -26(%rsi), %xmm0
1928 lddqu -16(%rsi), %xmm1
1929 movdqu %xmm0, -26(%rdi)
1930 movdqu %xmm1, -16(%rdi)
1931 ret
1932
1933 .p2align 4
1934 L(fwd_write_10bytes):
1935 mov -10(%rsi), %rdx
1936 mov -4(%rsi), %ecx
1937 mov %rdx, -10(%rdi)
1938 mov %ecx, -4(%rdi)
1939 ret
1940
1941 .p2align 4
1942 L(fwd_write_137bytes):
1943 lddqu -137(%rsi), %xmm0
1944 movdqu %xmm0, -137(%rdi)
1945 L(fwd_write_121bytes):
1946 lddqu -121(%rsi), %xmm0
1947 movdqu %xmm0, -121(%rdi)
1948 L(fwd_write_105bytes):
1949 lddqu -105(%rsi), %xmm0
1950 movdqu %xmm0, -105(%rdi)
1951 L(fwd_write_89bytes):
1952 lddqu -89(%rsi), %xmm0
1953 movdqu %xmm0, -89(%rdi)
1954 L(fwd_write_73bytes):
1955 lddqu -73(%rsi), %xmm0
1956 movdqu %xmm0, -73(%rdi)
1957 L(fwd_write_57bytes):
1958 lddqu -57(%rsi), %xmm0
1959 movdqu %xmm0, -57(%rdi)
1960 L(fwd_write_41bytes):
1961 lddqu -41(%rsi), %xmm0
1962 movdqu %xmm0, -41(%rdi)
1963 L(fwd_write_25bytes):
1964 lddqu -25(%rsi), %xmm0
1965 lddqu -16(%rsi), %xmm1
1966 movdqu %xmm0, -25(%rdi)
1967 movdqu %xmm1, -16(%rdi)
1968 ret
1969
1970 .p2align 4
1971 L(fwd_write_9bytes):
1972 mov -9(%rsi), %rdx
1973 mov -4(%rsi), %ecx
1974 mov %rdx, -9(%rdi)
1975 mov %ecx, -4(%rdi)
1976 ret
1977
1978 .p2align 4
1979 L(fwd_write_136bytes):
1980 lddqu -136(%rsi), %xmm0
1981 movdqu %xmm0, -136(%rdi)
1982 L(fwd_write_120bytes):
1983 lddqu -120(%rsi), %xmm0
1984 movdqu %xmm0, -120(%rdi)
1985 L(fwd_write_104bytes):
1986 lddqu -104(%rsi), %xmm0
1987 movdqu %xmm0, -104(%rdi)
1988 L(fwd_write_88bytes):
1989 lddqu -88(%rsi), %xmm0
1990 movdqu %xmm0, -88(%rdi)
1991 L(fwd_write_72bytes):
1992 lddqu -72(%rsi), %xmm0
1993 movdqu %xmm0, -72(%rdi)
1994 L(fwd_write_56bytes):
1995 lddqu -56(%rsi), %xmm0
1996 movdqu %xmm0, -56(%rdi)
1997 L(fwd_write_40bytes):
1998 lddqu -40(%rsi), %xmm0
1999 movdqu %xmm0, -40(%rdi)
2000 L(fwd_write_24bytes):
2001 lddqu -24(%rsi), %xmm0
2002 lddqu -16(%rsi), %xmm1
2003 movdqu %xmm0, -24(%rdi)
2004 movdqu %xmm1, -16(%rdi)
2005 ret
2006
2007 .p2align 4
2008 L(fwd_write_8bytes):
2009 mov -8(%rsi), %rdx
2010 mov %rdx, -8(%rdi)
2011 ret
2012
2013 .p2align 4
2014 L(fwd_write_135bytes):
2015 lddqu -135(%rsi), %xmm0
2016 movdqu %xmm0, -135(%rdi)
2017 L(fwd_write_119bytes):
2018 lddqu -119(%rsi), %xmm0
2019 movdqu %xmm0, -119(%rdi)
2020 L(fwd_write_103bytes):
2021 lddqu -103(%rsi), %xmm0
2022 movdqu %xmm0, -103(%rdi)
2023 L(fwd_write_87bytes):
2024 lddqu -87(%rsi), %xmm0
2025 movdqu %xmm0, -87(%rdi)
2026 L(fwd_write_71bytes):
2027 lddqu -71(%rsi), %xmm0
2028 movdqu %xmm0, -71(%rdi)
2029 L(fwd_write_55bytes):
2030 lddqu -55(%rsi), %xmm0
2031 movdqu %xmm0, -55(%rdi)
2032 L(fwd_write_39bytes):
2033 lddqu -39(%rsi), %xmm0
2034 movdqu %xmm0, -39(%rdi)
2035 L(fwd_write_23bytes):
2036 lddqu -23(%rsi), %xmm0
2037 lddqu -16(%rsi), %xmm1
2038 movdqu %xmm0, -23(%rdi)
2039 movdqu %xmm1, -16(%rdi)
2040 ret
2041
2042 .p2align 4
2043 L(fwd_write_7bytes):
2044 mov -7(%rsi), %edx
2045 mov -4(%rsi), %ecx
2046 mov %edx, -7(%rdi)
2047 mov %ecx, -4(%rdi)
2048 ret
2049
2050 .p2align 4
2051 L(fwd_write_134bytes):
2052 lddqu -134(%rsi), %xmm0
2053 movdqu %xmm0, -134(%rdi)
2054 L(fwd_write_118bytes):
2055 lddqu -118(%rsi), %xmm0
2056 movdqu %xmm0, -118(%rdi)
2057 L(fwd_write_102bytes):
2058 lddqu -102(%rsi), %xmm0
2059 movdqu %xmm0, -102(%rdi)
2060 L(fwd_write_86bytes):
2061 lddqu -86(%rsi), %xmm0
2062 movdqu %xmm0, -86(%rdi)
2063 L(fwd_write_70bytes):
2064 lddqu -70(%rsi), %xmm0
2065 movdqu %xmm0, -70(%rdi)
2066 L(fwd_write_54bytes):
2067 lddqu -54(%rsi), %xmm0
2068 movdqu %xmm0, -54(%rdi)
2069 L(fwd_write_38bytes):
2070 lddqu -38(%rsi), %xmm0
2071 movdqu %xmm0, -38(%rdi)
2072 L(fwd_write_22bytes):
2073 lddqu -22(%rsi), %xmm0
2074 lddqu -16(%rsi), %xmm1
2075 movdqu %xmm0, -22(%rdi)
2076 movdqu %xmm1, -16(%rdi)
2077 ret
2078
2079 .p2align 4
2080 L(fwd_write_6bytes):
2081 mov -6(%rsi), %edx
2082 mov -4(%rsi), %ecx
2083 mov %edx, -6(%rdi)
2084 mov %ecx, -4(%rdi)
2085 ret
2086
2087 .p2align 4
2088 L(fwd_write_133bytes):
2089 lddqu -133(%rsi), %xmm0
2090 movdqu %xmm0, -133(%rdi)
2091 L(fwd_write_117bytes):
2092 lddqu -117(%rsi), %xmm0
2093 movdqu %xmm0, -117(%rdi)
2094 L(fwd_write_101bytes):
2095 lddqu -101(%rsi), %xmm0
2096 movdqu %xmm0, -101(%rdi)
2097 L(fwd_write_85bytes):
2098 lddqu -85(%rsi), %xmm0
2099 movdqu %xmm0, -85(%rdi)
2100 L(fwd_write_69bytes):
2101 lddqu -69(%rsi), %xmm0
2102 movdqu %xmm0, -69(%rdi)
2103 L(fwd_write_53bytes):
2104 lddqu -53(%rsi), %xmm0
2105 movdqu %xmm0, -53(%rdi)
2106 L(fwd_write_37bytes):
2107 lddqu -37(%rsi), %xmm0
2108 movdqu %xmm0, -37(%rdi)
2109 L(fwd_write_21bytes):
2110 lddqu -21(%rsi), %xmm0
2111 lddqu -16(%rsi), %xmm1
2112 movdqu %xmm0, -21(%rdi)
2113 movdqu %xmm1, -16(%rdi)
2114 ret
2115
2116 .p2align 4
2117 L(fwd_write_5bytes):
2118 mov -5(%rsi), %edx
2119 mov -4(%rsi), %ecx
2120 mov %edx, -5(%rdi)
2121 mov %ecx, -4(%rdi)
2122 ret
2123
2124 .p2align 4
2125 L(fwd_write_132bytes):
2126 lddqu -132(%rsi), %xmm0
2127 movdqu %xmm0, -132(%rdi)
2128 L(fwd_write_116bytes):
2129 lddqu -116(%rsi), %xmm0
2130 movdqu %xmm0, -116(%rdi)
2131 L(fwd_write_100bytes):
2132 lddqu -100(%rsi), %xmm0
2133 movdqu %xmm0, -100(%rdi)
2134 L(fwd_write_84bytes):
2135 lddqu -84(%rsi), %xmm0
2136 movdqu %xmm0, -84(%rdi)
2137 L(fwd_write_68bytes):
2138 lddqu -68(%rsi), %xmm0
2139 movdqu %xmm0, -68(%rdi)
2140 L(fwd_write_52bytes):
2141 lddqu -52(%rsi), %xmm0
2142 movdqu %xmm0, -52(%rdi)
2143 L(fwd_write_36bytes):
2144 lddqu -36(%rsi), %xmm0
2145 movdqu %xmm0, -36(%rdi)
2146 L(fwd_write_20bytes):
2147 lddqu -20(%rsi), %xmm0
2148 lddqu -16(%rsi), %xmm1
2149 movdqu %xmm0, -20(%rdi)
2150 movdqu %xmm1, -16(%rdi)
2151 ret
2152
2153 .p2align 4
2154 L(fwd_write_4bytes):
2155 mov -4(%rsi), %edx
2156 mov %edx, -4(%rdi)
2157 ret
2158
2159 .p2align 4
2160 L(fwd_write_131bytes):
2161 lddqu -131(%rsi), %xmm0
2162 movdqu %xmm0, -131(%rdi)
2163 L(fwd_write_115bytes):
2164 lddqu -115(%rsi), %xmm0
2165 movdqu %xmm0, -115(%rdi)
2166 L(fwd_write_99bytes):
2167 lddqu -99(%rsi), %xmm0
2168 movdqu %xmm0, -99(%rdi)
2169 L(fwd_write_83bytes):
2170 lddqu -83(%rsi), %xmm0
2171 movdqu %xmm0, -83(%rdi)
2172 L(fwd_write_67bytes):
2173 lddqu -67(%rsi), %xmm0
2174 movdqu %xmm0, -67(%rdi)
2175 L(fwd_write_51bytes):
2176 lddqu -51(%rsi), %xmm0
2177 movdqu %xmm0, -51(%rdi)
2178 L(fwd_write_35bytes):
2179 lddqu -35(%rsi), %xmm0
2180 movdqu %xmm0, -35(%rdi)
2181 L(fwd_write_19bytes):
2182 lddqu -19(%rsi), %xmm0
2183 lddqu -16(%rsi), %xmm1
2184 movdqu %xmm0, -19(%rdi)
2185 movdqu %xmm1, -16(%rdi)
2186 ret
2187
2188 .p2align 4
2189 L(fwd_write_3bytes):
2190 mov -3(%rsi), %dx
2191 mov -2(%rsi), %cx
2192 mov %dx, -3(%rdi)
2193 mov %cx, -2(%rdi)
2194 ret
2195
2196 .p2align 4
2197 L(fwd_write_130bytes):
2198 lddqu -130(%rsi), %xmm0
2199 movdqu %xmm0, -130(%rdi)
2200 L(fwd_write_114bytes):
2201 lddqu -114(%rsi), %xmm0
2202 movdqu %xmm0, -114(%rdi)
2203 L(fwd_write_98bytes):
2204 lddqu -98(%rsi), %xmm0
2205 movdqu %xmm0, -98(%rdi)
2206 L(fwd_write_82bytes):
2207 lddqu -82(%rsi), %xmm0
2208 movdqu %xmm0, -82(%rdi)
2209 L(fwd_write_66bytes):
2210 lddqu -66(%rsi), %xmm0
2211 movdqu %xmm0, -66(%rdi)
2212 L(fwd_write_50bytes):
2213 lddqu -50(%rsi), %xmm0
2214 movdqu %xmm0, -50(%rdi)
2215 L(fwd_write_34bytes):
2216 lddqu -34(%rsi), %xmm0
2217 movdqu %xmm0, -34(%rdi)
2218 L(fwd_write_18bytes):
2219 lddqu -18(%rsi), %xmm0
2220 lddqu -16(%rsi), %xmm1
2221 movdqu %xmm0, -18(%rdi)
2222 movdqu %xmm1, -16(%rdi)
2223 ret
2224
2225 .p2align 4
2226 L(fwd_write_2bytes):
2227 movzwl -2(%rsi), %edx
2228 mov %dx, -2(%rdi)
2229 ret
2230
2231 .p2align 4
2232 L(fwd_write_129bytes):
2233 lddqu -129(%rsi), %xmm0
2234 movdqu %xmm0, -129(%rdi)
2235 L(fwd_write_113bytes):
2236 lddqu -113(%rsi), %xmm0
2237 movdqu %xmm0, -113(%rdi)
2238 L(fwd_write_97bytes):
2239 lddqu -97(%rsi), %xmm0
2240 movdqu %xmm0, -97(%rdi)
2241 L(fwd_write_81bytes):
2242 lddqu -81(%rsi), %xmm0
2243 movdqu %xmm0, -81(%rdi)
2244 L(fwd_write_65bytes):
2245 lddqu -65(%rsi), %xmm0
2246 movdqu %xmm0, -65(%rdi)
2247 L(fwd_write_49bytes):
2248 lddqu -49(%rsi), %xmm0
2249 movdqu %xmm0, -49(%rdi)
2250 L(fwd_write_33bytes):
2251 lddqu -33(%rsi), %xmm0
2252 movdqu %xmm0, -33(%rdi)
2253 L(fwd_write_17bytes):
2254 lddqu -17(%rsi), %xmm0
2255 lddqu -16(%rsi), %xmm1
2256 movdqu %xmm0, -17(%rdi)
2257 movdqu %xmm1, -16(%rdi)
2258 ret
2259
2260 .p2align 4
2261 L(fwd_write_1bytes):
2262 movzbl -1(%rsi), %edx
2263 mov %dl, -1(%rdi)
2264 ret
2265
2266 .p2align 4
2267 L(bwd_write_128bytes):
2268 lddqu 112(%rsi), %xmm0
2269 movdqu %xmm0, 112(%rdi)
2270 L(bwd_write_112bytes):
2271 lddqu 96(%rsi), %xmm0
2272 movdqu %xmm0, 96(%rdi)
2273 L(bwd_write_96bytes):
2274 lddqu 80(%rsi), %xmm0
2275 movdqu %xmm0, 80(%rdi)
2276 L(bwd_write_80bytes):
2277 lddqu 64(%rsi), %xmm0
2278 movdqu %xmm0, 64(%rdi)
2279 L(bwd_write_64bytes):
2280 lddqu 48(%rsi), %xmm0
2281 movdqu %xmm0, 48(%rdi)
2282 L(bwd_write_48bytes):
2283 lddqu 32(%rsi), %xmm0
2284 movdqu %xmm0, 32(%rdi)
2285 L(bwd_write_32bytes):
2286 lddqu 16(%rsi), %xmm0
2287 movdqu %xmm0, 16(%rdi)
2288 L(bwd_write_16bytes):
2289 lddqu (%rsi), %xmm0
2290 movdqu %xmm0, (%rdi)
2291 L(bwd_write_0bytes):
2292 ret
2293
2294 .p2align 4
2295 L(bwd_write_143bytes):
2296 lddqu 127(%rsi), %xmm0
2297 movdqu %xmm0, 127(%rdi)
2298 L(bwd_write_127bytes):
2299 lddqu 111(%rsi), %xmm0
2300 movdqu %xmm0, 111(%rdi)
2301 L(bwd_write_111bytes):
2302 lddqu 95(%rsi), %xmm0
2303 movdqu %xmm0, 95(%rdi)
2304 L(bwd_write_95bytes):
2305 lddqu 79(%rsi), %xmm0
2306 movdqu %xmm0, 79(%rdi)
2307 L(bwd_write_79bytes):
2308 lddqu 63(%rsi), %xmm0
2309 movdqu %xmm0, 63(%rdi)
2310 L(bwd_write_63bytes):
2311 lddqu 47(%rsi), %xmm0
2312 movdqu %xmm0, 47(%rdi)
2313 L(bwd_write_47bytes):
2314 lddqu 31(%rsi), %xmm0
2315 movdqu %xmm0, 31(%rdi)
2316 L(bwd_write_31bytes):
2317 lddqu 15(%rsi), %xmm0
2318 lddqu (%rsi), %xmm1
2319 movdqu %xmm0, 15(%rdi)
2320 movdqu %xmm1, (%rdi)
2321 ret
2322
2323
2324 .p2align 4
2325 L(bwd_write_15bytes):
2326 mov 7(%rsi), %rdx
2327 mov (%rsi), %rcx
2328 mov %rdx, 7(%rdi)
2329 mov %rcx, (%rdi)
2330 ret
2331
2332 .p2align 4
2333 L(bwd_write_142bytes):
2334 lddqu 126(%rsi), %xmm0
2335 movdqu %xmm0, 126(%rdi)
2336 L(bwd_write_126bytes):
2337 lddqu 110(%rsi), %xmm0
2338 movdqu %xmm0, 110(%rdi)
2339 L(bwd_write_110bytes):
2340 lddqu 94(%rsi), %xmm0
2341 movdqu %xmm0, 94(%rdi)
2342 L(bwd_write_94bytes):
2343 lddqu 78(%rsi), %xmm0
2344 movdqu %xmm0, 78(%rdi)
2345 L(bwd_write_78bytes):
2346 lddqu 62(%rsi), %xmm0
2347 movdqu %xmm0, 62(%rdi)
2348 L(bwd_write_62bytes):
2349 lddqu 46(%rsi), %xmm0
2350 movdqu %xmm0, 46(%rdi)
2351 L(bwd_write_46bytes):
2352 lddqu 30(%rsi), %xmm0
2353 movdqu %xmm0, 30(%rdi)
2354 L(bwd_write_30bytes):
2355 lddqu 14(%rsi), %xmm0
2356 lddqu (%rsi), %xmm1
2357 movdqu %xmm0, 14(%rdi)
2358 movdqu %xmm1, (%rdi)
2359 ret
2360
2361 .p2align 4
2362 L(bwd_write_14bytes):
2363 mov 6(%rsi), %rdx
2364 mov (%rsi), %rcx
2365 mov %rdx, 6(%rdi)
2366 mov %rcx, (%rdi)
2367 ret
2368
2369 .p2align 4
2370 L(bwd_write_141bytes):
2371 lddqu 125(%rsi), %xmm0
2372 movdqu %xmm0, 125(%rdi)
2373 L(bwd_write_125bytes):
2374 lddqu 109(%rsi), %xmm0
2375 movdqu %xmm0, 109(%rdi)
2376 L(bwd_write_109bytes):
2377 lddqu 93(%rsi), %xmm0
2378 movdqu %xmm0, 93(%rdi)
2379 L(bwd_write_93bytes):
2380 lddqu 77(%rsi), %xmm0
2381 movdqu %xmm0, 77(%rdi)
2382 L(bwd_write_77bytes):
2383 lddqu 61(%rsi), %xmm0
2384 movdqu %xmm0, 61(%rdi)
2385 L(bwd_write_61bytes):
2386 lddqu 45(%rsi), %xmm0
2387 movdqu %xmm0, 45(%rdi)
2388 L(bwd_write_45bytes):
2389 lddqu 29(%rsi), %xmm0
2390 movdqu %xmm0, 29(%rdi)
2391 L(bwd_write_29bytes):
2392 lddqu 13(%rsi), %xmm0
2393 lddqu (%rsi), %xmm1
2394 movdqu %xmm0, 13(%rdi)
2395 movdqu %xmm1, (%rdi)
2396 ret
2397
2398 .p2align 4
2399 L(bwd_write_13bytes):
2400 mov 5(%rsi), %rdx
2401 mov (%rsi), %rcx
2402 mov %rdx, 5(%rdi)
2403 mov %rcx, (%rdi)
2404 ret
2405
2406 .p2align 4
2407 L(bwd_write_140bytes):
2408 lddqu 124(%rsi), %xmm0
2409 movdqu %xmm0, 124(%rdi)
2410 L(bwd_write_124bytes):
2411 lddqu 108(%rsi), %xmm0
2412 movdqu %xmm0, 108(%rdi)
2413 L(bwd_write_108bytes):
2414 lddqu 92(%rsi), %xmm0
2415 movdqu %xmm0, 92(%rdi)
2416 L(bwd_write_92bytes):
2417 lddqu 76(%rsi), %xmm0
2418 movdqu %xmm0, 76(%rdi)
2419 L(bwd_write_76bytes):
2420 lddqu 60(%rsi), %xmm0
2421 movdqu %xmm0, 60(%rdi)
2422 L(bwd_write_60bytes):
2423 lddqu 44(%rsi), %xmm0
2424 movdqu %xmm0, 44(%rdi)
2425 L(bwd_write_44bytes):
2426 lddqu 28(%rsi), %xmm0
2427 movdqu %xmm0, 28(%rdi)
2428 L(bwd_write_28bytes):
2429 lddqu 12(%rsi), %xmm0
2430 lddqu (%rsi), %xmm1
2431 movdqu %xmm0, 12(%rdi)
2432 movdqu %xmm1, (%rdi)
2433 ret
2434
2435 .p2align 4
2436 L(bwd_write_12bytes):
2437 mov 4(%rsi), %rdx
2438 mov (%rsi), %rcx
2439 mov %rdx, 4(%rdi)
2440 mov %rcx, (%rdi)
2441 ret
2442
2443 .p2align 4
2444 L(bwd_write_139bytes):
2445 lddqu 123(%rsi), %xmm0
2446 movdqu %xmm0, 123(%rdi)
2447 L(bwd_write_123bytes):
2448 lddqu 107(%rsi), %xmm0
2449 movdqu %xmm0, 107(%rdi)
2450 L(bwd_write_107bytes):
2451 lddqu 91(%rsi), %xmm0
2452 movdqu %xmm0, 91(%rdi)
2453 L(bwd_write_91bytes):
2454 lddqu 75(%rsi), %xmm0
2455 movdqu %xmm0, 75(%rdi)
2456 L(bwd_write_75bytes):
2457 lddqu 59(%rsi), %xmm0
2458 movdqu %xmm0, 59(%rdi)
2459 L(bwd_write_59bytes):
2460 lddqu 43(%rsi), %xmm0
2461 movdqu %xmm0, 43(%rdi)
2462 L(bwd_write_43bytes):
2463 lddqu 27(%rsi), %xmm0
2464 movdqu %xmm0, 27(%rdi)
2465 L(bwd_write_27bytes):
2466 lddqu 11(%rsi), %xmm0
2467 lddqu (%rsi), %xmm1
2468 movdqu %xmm0, 11(%rdi)
2469 movdqu %xmm1, (%rdi)
2470 ret
2471
2472 .p2align 4
2473 L(bwd_write_11bytes):
2474 mov 3(%rsi), %rdx
2475 mov (%rsi), %rcx
2476 mov %rdx, 3(%rdi)
2477 mov %rcx, (%rdi)
2478 ret
2479
2480 .p2align 4
2481 L(bwd_write_138bytes):
2482 lddqu 122(%rsi), %xmm0
2483 movdqu %xmm0, 122(%rdi)
2484 L(bwd_write_122bytes):
2485 lddqu 106(%rsi), %xmm0
2486 movdqu %xmm0, 106(%rdi)
2487 L(bwd_write_106bytes):
2488 lddqu 90(%rsi), %xmm0
2489 movdqu %xmm0, 90(%rdi)
2490 L(bwd_write_90bytes):
2491 lddqu 74(%rsi), %xmm0
2492 movdqu %xmm0, 74(%rdi)
2493 L(bwd_write_74bytes):
2494 lddqu 58(%rsi), %xmm0
2495 movdqu %xmm0, 58(%rdi)
2496 L(bwd_write_58bytes):
2497 lddqu 42(%rsi), %xmm0
2498 movdqu %xmm0, 42(%rdi)
2499 L(bwd_write_42bytes):
2500 lddqu 26(%rsi), %xmm0
2501 movdqu %xmm0, 26(%rdi)
2502 L(bwd_write_26bytes):
2503 lddqu 10(%rsi), %xmm0
2504 lddqu (%rsi), %xmm1
2505 movdqu %xmm0, 10(%rdi)
2506 movdqu %xmm1, (%rdi)
2507 ret
2508
2509 .p2align 4
2510 L(bwd_write_10bytes):
2511 mov 2(%rsi), %rdx
2512 mov (%rsi), %rcx
2513 mov %rdx, 2(%rdi)
2514 mov %rcx, (%rdi)
2515 ret
2516
2517 .p2align 4
2518 L(bwd_write_137bytes):
2519 lddqu 121(%rsi), %xmm0
2520 movdqu %xmm0, 121(%rdi)
2521 L(bwd_write_121bytes):
2522 lddqu 105(%rsi), %xmm0
2523 movdqu %xmm0, 105(%rdi)
2524 L(bwd_write_105bytes):
2525 lddqu 89(%rsi), %xmm0
2526 movdqu %xmm0, 89(%rdi)
2527 L(bwd_write_89bytes):
2528 lddqu 73(%rsi), %xmm0
2529 movdqu %xmm0, 73(%rdi)
2530 L(bwd_write_73bytes):
2531 lddqu 57(%rsi), %xmm0
2532 movdqu %xmm0, 57(%rdi)
2533 L(bwd_write_57bytes):
2534 lddqu 41(%rsi), %xmm0
2535 movdqu %xmm0, 41(%rdi)
2536 L(bwd_write_41bytes):
2537 lddqu 25(%rsi), %xmm0
2538 movdqu %xmm0, 25(%rdi)
2539 L(bwd_write_25bytes):
2540 lddqu 9(%rsi), %xmm0
2541 lddqu (%rsi), %xmm1
2542 movdqu %xmm0, 9(%rdi)
2543 movdqu %xmm1, (%rdi)
2544 ret
2545
2546 .p2align 4
2547 L(bwd_write_9bytes):
2548 mov 1(%rsi), %rdx
2549 mov (%rsi), %rcx
2550 mov %rdx, 1(%rdi)
2551 mov %rcx, (%rdi)
2552 ret
2553
2554 .p2align 4
2555 L(bwd_write_136bytes):
2556 lddqu 120(%rsi), %xmm0
2557 movdqu %xmm0, 120(%rdi)
2558 L(bwd_write_120bytes):
2559 lddqu 104(%rsi), %xmm0
2560 movdqu %xmm0, 104(%rdi)
2561 L(bwd_write_104bytes):
2562 lddqu 88(%rsi), %xmm0
2563 movdqu %xmm0, 88(%rdi)
2564 L(bwd_write_88bytes):
2565 lddqu 72(%rsi), %xmm0
2566 movdqu %xmm0, 72(%rdi)
2567 L(bwd_write_72bytes):
2568 lddqu 56(%rsi), %xmm0
2569 movdqu %xmm0, 56(%rdi)
2570 L(bwd_write_56bytes):
2571 lddqu 40(%rsi), %xmm0
2572 movdqu %xmm0, 40(%rdi)
2573 L(bwd_write_40bytes):
2574 lddqu 24(%rsi), %xmm0
2575 movdqu %xmm0, 24(%rdi)
2576 L(bwd_write_24bytes):
2577 lddqu 8(%rsi), %xmm0
2578 lddqu (%rsi), %xmm1
2579 movdqu %xmm0, 8(%rdi)
2580 movdqu %xmm1, (%rdi)
2581 ret
2582
2583 .p2align 4
2584 L(bwd_write_8bytes):
2585 mov (%rsi), %rdx
2586 mov %rdx, (%rdi)
2587 ret
2588
2589 .p2align 4
2590 L(bwd_write_135bytes):
2591 lddqu 119(%rsi), %xmm0
2592 movdqu %xmm0, 119(%rdi)
2593 L(bwd_write_119bytes):
2594 lddqu 103(%rsi), %xmm0
2595 movdqu %xmm0, 103(%rdi)
2596 L(bwd_write_103bytes):
2597 lddqu 87(%rsi), %xmm0
2598 movdqu %xmm0, 87(%rdi)
2599 L(bwd_write_87bytes):
2600 lddqu 71(%rsi), %xmm0
2601 movdqu %xmm0, 71(%rdi)
2602 L(bwd_write_71bytes):
2603 lddqu 55(%rsi), %xmm0
2604 movdqu %xmm0, 55(%rdi)
2605 L(bwd_write_55bytes):
2606 lddqu 39(%rsi), %xmm0
2607 movdqu %xmm0, 39(%rdi)
2608 L(bwd_write_39bytes):
2609 lddqu 23(%rsi), %xmm0
2610 movdqu %xmm0, 23(%rdi)
2611 L(bwd_write_23bytes):
2612 lddqu 7(%rsi), %xmm0
2613 lddqu (%rsi), %xmm1
2614 movdqu %xmm0, 7(%rdi)
2615 movdqu %xmm1, (%rdi)
2616 ret
2617
2618 .p2align 4
2619 L(bwd_write_7bytes):
2620 mov 3(%rsi), %edx
2621 mov (%rsi), %ecx
2622 mov %edx, 3(%rdi)
2623 mov %ecx, (%rdi)
2624 ret
2625
2626 .p2align 4
2627 L(bwd_write_134bytes):
2628 lddqu 118(%rsi), %xmm0
2629 movdqu %xmm0, 118(%rdi)
2630 L(bwd_write_118bytes):
2631 lddqu 102(%rsi), %xmm0
2632 movdqu %xmm0, 102(%rdi)
2633 L(bwd_write_102bytes):
2634 lddqu 86(%rsi), %xmm0
2635 movdqu %xmm0, 86(%rdi)
2636 L(bwd_write_86bytes):
2637 lddqu 70(%rsi), %xmm0
2638 movdqu %xmm0, 70(%rdi)
2639 L(bwd_write_70bytes):
2640 lddqu 54(%rsi), %xmm0
2641 movdqu %xmm0, 54(%rdi)
2642 L(bwd_write_54bytes):
2643 lddqu 38(%rsi), %xmm0
2644 movdqu %xmm0, 38(%rdi)
2645 L(bwd_write_38bytes):
2646 lddqu 22(%rsi), %xmm0
2647 movdqu %xmm0, 22(%rdi)
2648 L(bwd_write_22bytes):
2649 lddqu 6(%rsi), %xmm0
2650 lddqu (%rsi), %xmm1
2651 movdqu %xmm0, 6(%rdi)
2652 movdqu %xmm1, (%rdi)
2653 ret
2654
2655 .p2align 4
2656 L(bwd_write_6bytes):
2657 mov 2(%rsi), %edx
2658 mov (%rsi), %ecx
2659 mov %edx, 2(%rdi)
2660 mov %ecx, (%rdi)
2661 ret
2662
2663 .p2align 4
2664 L(bwd_write_133bytes):
2665 lddqu 117(%rsi), %xmm0
2666 movdqu %xmm0, 117(%rdi)
2667 L(bwd_write_117bytes):
2668 lddqu 101(%rsi), %xmm0
2669 movdqu %xmm0, 101(%rdi)
2670 L(bwd_write_101bytes):
2671 lddqu 85(%rsi), %xmm0
2672 movdqu %xmm0, 85(%rdi)
2673 L(bwd_write_85bytes):
2674 lddqu 69(%rsi), %xmm0
2675 movdqu %xmm0, 69(%rdi)
2676 L(bwd_write_69bytes):
2677 lddqu 53(%rsi), %xmm0
2678 movdqu %xmm0, 53(%rdi)
2679 L(bwd_write_53bytes):
2680 lddqu 37(%rsi), %xmm0
2681 movdqu %xmm0, 37(%rdi)
2682 L(bwd_write_37bytes):
2683 lddqu 21(%rsi), %xmm0
2684 movdqu %xmm0, 21(%rdi)
2685 L(bwd_write_21bytes):
2686 lddqu 5(%rsi), %xmm0
2687 lddqu (%rsi), %xmm1
2688 movdqu %xmm0, 5(%rdi)
2689 movdqu %xmm1, (%rdi)
2690 ret
2691
2692 .p2align 4
2693 L(bwd_write_5bytes):
2694 mov 1(%rsi), %edx
2695 mov (%rsi), %ecx
2696 mov %edx, 1(%rdi)
2697 mov %ecx, (%rdi)
2698 ret
2699
2700 .p2align 4
2701 L(bwd_write_132bytes):
2702 lddqu 116(%rsi), %xmm0
2703 movdqu %xmm0, 116(%rdi)
2704 L(bwd_write_116bytes):
2705 lddqu 100(%rsi), %xmm0
2706 movdqu %xmm0, 100(%rdi)
2707 L(bwd_write_100bytes):
2708 lddqu 84(%rsi), %xmm0
2709 movdqu %xmm0, 84(%rdi)
2710 L(bwd_write_84bytes):
2711 lddqu 68(%rsi), %xmm0
2712 movdqu %xmm0, 68(%rdi)
2713 L(bwd_write_68bytes):
2714 lddqu 52(%rsi), %xmm0
2715 movdqu %xmm0, 52(%rdi)
2716 L(bwd_write_52bytes):
2717 lddqu 36(%rsi), %xmm0
2718 movdqu %xmm0, 36(%rdi)
2719 L(bwd_write_36bytes):
2720 lddqu 20(%rsi), %xmm0
2721 movdqu %xmm0, 20(%rdi)
2722 L(bwd_write_20bytes):
2723 lddqu 4(%rsi), %xmm0
2724 lddqu (%rsi), %xmm1
2725 movdqu %xmm0, 4(%rdi)
2726 movdqu %xmm1, (%rdi)
2727 ret
2728
2729 .p2align 4
2730 L(bwd_write_4bytes):
2731 mov (%rsi), %edx
2732 mov %edx, (%rdi)
2733 ret
2734
2735 .p2align 4
2736 L(bwd_write_131bytes):
2737 lddqu 115(%rsi), %xmm0
2738 movdqu %xmm0, 115(%rdi)
2739 L(bwd_write_115bytes):
2740 lddqu 99(%rsi), %xmm0
2741 movdqu %xmm0, 99(%rdi)
2742 L(bwd_write_99bytes):
2743 lddqu 83(%rsi), %xmm0
2744 movdqu %xmm0, 83(%rdi)
2745 L(bwd_write_83bytes):
2746 lddqu 67(%rsi), %xmm0
2747 movdqu %xmm0, 67(%rdi)
2748 L(bwd_write_67bytes):
2749 lddqu 51(%rsi), %xmm0
2750 movdqu %xmm0, 51(%rdi)
2751 L(bwd_write_51bytes):
2752 lddqu 35(%rsi), %xmm0
2753 movdqu %xmm0, 35(%rdi)
2754 L(bwd_write_35bytes):
2755 lddqu 19(%rsi), %xmm0
2756 movdqu %xmm0, 19(%rdi)
2757 L(bwd_write_19bytes):
2758 lddqu 3(%rsi), %xmm0
2759 lddqu (%rsi), %xmm1
2760 movdqu %xmm0, 3(%rdi)
2761 movdqu %xmm1, (%rdi)
2762 ret
2763
2764 .p2align 4
2765 L(bwd_write_3bytes):
2766 mov 1(%rsi), %dx
2767 mov (%rsi), %cx
2768 mov %dx, 1(%rdi)
2769 mov %cx, (%rdi)
2770 ret
2771
2772 .p2align 4
2773 L(bwd_write_130bytes):
2774 lddqu 114(%rsi), %xmm0
2775 movdqu %xmm0, 114(%rdi)
2776 L(bwd_write_114bytes):
2777 lddqu 98(%rsi), %xmm0
2778 movdqu %xmm0, 98(%rdi)
2779 L(bwd_write_98bytes):
2780 lddqu 82(%rsi), %xmm0
2781 movdqu %xmm0, 82(%rdi)
2782 L(bwd_write_82bytes):
2783 lddqu 66(%rsi), %xmm0
2784 movdqu %xmm0, 66(%rdi)
2785 L(bwd_write_66bytes):
2786 lddqu 50(%rsi), %xmm0
2787 movdqu %xmm0, 50(%rdi)
2788 L(bwd_write_50bytes):
2789 lddqu 34(%rsi), %xmm0
2790 movdqu %xmm0, 34(%rdi)
2791 L(bwd_write_34bytes):
2792 lddqu 18(%rsi), %xmm0
2793 movdqu %xmm0, 18(%rdi)
2794 L(bwd_write_18bytes):
2795 lddqu 2(%rsi), %xmm0
2796 lddqu (%rsi), %xmm1
2797 movdqu %xmm0, 2(%rdi)
2798 movdqu %xmm1, (%rdi)
2799 ret
2800
2801 .p2align 4
2802 L(bwd_write_2bytes):
2803 movzwl (%rsi), %edx
2804 mov %dx, (%rdi)
2805 ret
2806
2807 .p2align 4
2808 L(bwd_write_129bytes):
2809 lddqu 113(%rsi), %xmm0
2810 movdqu %xmm0, 113(%rdi)
2811 L(bwd_write_113bytes):
2812 lddqu 97(%rsi), %xmm0
2813 movdqu %xmm0, 97(%rdi)
2814 L(bwd_write_97bytes):
2815 lddqu 81(%rsi), %xmm0
2816 movdqu %xmm0, 81(%rdi)
2817 L(bwd_write_81bytes):
2818 lddqu 65(%rsi), %xmm0
2819 movdqu %xmm0, 65(%rdi)
2820 L(bwd_write_65bytes):
2821 lddqu 49(%rsi), %xmm0
2822 movdqu %xmm0, 49(%rdi)
2823 L(bwd_write_49bytes):
2824 lddqu 33(%rsi), %xmm0
2825 movdqu %xmm0, 33(%rdi)
2826 L(bwd_write_33bytes):
2827 lddqu 17(%rsi), %xmm0
2828 movdqu %xmm0, 17(%rdi)
2829 L(bwd_write_17bytes):
2830 lddqu 1(%rsi), %xmm0
2831 lddqu (%rsi), %xmm1
2832 movdqu %xmm0, 1(%rdi)
2833 movdqu %xmm1, (%rdi)
2834 ret
2835
2836 .p2align 4
2837 L(bwd_write_1bytes):
2838 movzbl (%rsi), %edx
2839 mov %dl, (%rdi)
2840 ret
2841
2842 END (MEMCPY)
2843
2844 .section .rodata.ssse3,"a",@progbits
2845 .p2align 3
2846 L(table_144_bytes_bwd):
2847 .int JMPTBL (L(bwd_write_0bytes), L(table_144_bytes_bwd))
2848 .int JMPTBL (L(bwd_write_1bytes), L(table_144_bytes_bwd))
2849 .int JMPTBL (L(bwd_write_2bytes), L(table_144_bytes_bwd))
2850 .int JMPTBL (L(bwd_write_3bytes), L(table_144_bytes_bwd))
2851 .int JMPTBL (L(bwd_write_4bytes), L(table_144_bytes_bwd))
2852 .int JMPTBL (L(bwd_write_5bytes), L(table_144_bytes_bwd))
2853 .int JMPTBL (L(bwd_write_6bytes), L(table_144_bytes_bwd))
2854 .int JMPTBL (L(bwd_write_7bytes), L(table_144_bytes_bwd))
2855 .int JMPTBL (L(bwd_write_8bytes), L(table_144_bytes_bwd))
2856 .int JMPTBL (L(bwd_write_9bytes), L(table_144_bytes_bwd))
2857 .int JMPTBL (L(bwd_write_10bytes), L(table_144_bytes_bwd))
2858 .int JMPTBL (L(bwd_write_11bytes), L(table_144_bytes_bwd))
2859 .int JMPTBL (L(bwd_write_12bytes), L(table_144_bytes_bwd))
2860 .int JMPTBL (L(bwd_write_13bytes), L(table_144_bytes_bwd))
2861 .int JMPTBL (L(bwd_write_14bytes), L(table_144_bytes_bwd))
2862 .int JMPTBL (L(bwd_write_15bytes), L(table_144_bytes_bwd))
2863 .int JMPTBL (L(bwd_write_16bytes), L(table_144_bytes_bwd))
2864 .int JMPTBL (L(bwd_write_17bytes), L(table_144_bytes_bwd))
2865 .int JMPTBL (L(bwd_write_18bytes), L(table_144_bytes_bwd))
2866 .int JMPTBL (L(bwd_write_19bytes), L(table_144_bytes_bwd))
2867 .int JMPTBL (L(bwd_write_20bytes), L(table_144_bytes_bwd))
2868 .int JMPTBL (L(bwd_write_21bytes), L(table_144_bytes_bwd))
2869 .int JMPTBL (L(bwd_write_22bytes), L(table_144_bytes_bwd))
2870 .int JMPTBL (L(bwd_write_23bytes), L(table_144_bytes_bwd))
2871 .int JMPTBL (L(bwd_write_24bytes), L(table_144_bytes_bwd))
2872 .int JMPTBL (L(bwd_write_25bytes), L(table_144_bytes_bwd))
2873 .int JMPTBL (L(bwd_write_26bytes), L(table_144_bytes_bwd))
2874 .int JMPTBL (L(bwd_write_27bytes), L(table_144_bytes_bwd))
2875 .int JMPTBL (L(bwd_write_28bytes), L(table_144_bytes_bwd))
2876 .int JMPTBL (L(bwd_write_29bytes), L(table_144_bytes_bwd))
2877 .int JMPTBL (L(bwd_write_30bytes), L(table_144_bytes_bwd))
2878 .int JMPTBL (L(bwd_write_31bytes), L(table_144_bytes_bwd))
2879 .int JMPTBL (L(bwd_write_32bytes), L(table_144_bytes_bwd))
2880 .int JMPTBL (L(bwd_write_33bytes), L(table_144_bytes_bwd))
2881 .int JMPTBL (L(bwd_write_34bytes), L(table_144_bytes_bwd))
2882 .int JMPTBL (L(bwd_write_35bytes), L(table_144_bytes_bwd))
2883 .int JMPTBL (L(bwd_write_36bytes), L(table_144_bytes_bwd))
2884 .int JMPTBL (L(bwd_write_37bytes), L(table_144_bytes_bwd))
2885 .int JMPTBL (L(bwd_write_38bytes), L(table_144_bytes_bwd))
2886 .int JMPTBL (L(bwd_write_39bytes), L(table_144_bytes_bwd))
2887 .int JMPTBL (L(bwd_write_40bytes), L(table_144_bytes_bwd))
2888 .int JMPTBL (L(bwd_write_41bytes), L(table_144_bytes_bwd))
2889 .int JMPTBL (L(bwd_write_42bytes), L(table_144_bytes_bwd))
2890 .int JMPTBL (L(bwd_write_43bytes), L(table_144_bytes_bwd))
2891 .int JMPTBL (L(bwd_write_44bytes), L(table_144_bytes_bwd))
2892 .int JMPTBL (L(bwd_write_45bytes), L(table_144_bytes_bwd))
2893 .int JMPTBL (L(bwd_write_46bytes), L(table_144_bytes_bwd))
2894 .int JMPTBL (L(bwd_write_47bytes), L(table_144_bytes_bwd))
2895 .int JMPTBL (L(bwd_write_48bytes), L(table_144_bytes_bwd))
2896 .int JMPTBL (L(bwd_write_49bytes), L(table_144_bytes_bwd))
2897 .int JMPTBL (L(bwd_write_50bytes), L(table_144_bytes_bwd))
2898 .int JMPTBL (L(bwd_write_51bytes), L(table_144_bytes_bwd))
2899 .int JMPTBL (L(bwd_write_52bytes), L(table_144_bytes_bwd))
2900 .int JMPTBL (L(bwd_write_53bytes), L(table_144_bytes_bwd))
2901 .int JMPTBL (L(bwd_write_54bytes), L(table_144_bytes_bwd))
2902 .int JMPTBL (L(bwd_write_55bytes), L(table_144_bytes_bwd))
2903 .int JMPTBL (L(bwd_write_56bytes), L(table_144_bytes_bwd))
2904 .int JMPTBL (L(bwd_write_57bytes), L(table_144_bytes_bwd))
2905 .int JMPTBL (L(bwd_write_58bytes), L(table_144_bytes_bwd))
2906 .int JMPTBL (L(bwd_write_59bytes), L(table_144_bytes_bwd))
2907 .int JMPTBL (L(bwd_write_60bytes), L(table_144_bytes_bwd))
2908 .int JMPTBL (L(bwd_write_61bytes), L(table_144_bytes_bwd))
2909 .int JMPTBL (L(bwd_write_62bytes), L(table_144_bytes_bwd))
2910 .int JMPTBL (L(bwd_write_63bytes), L(table_144_bytes_bwd))
2911 .int JMPTBL (L(bwd_write_64bytes), L(table_144_bytes_bwd))
2912 .int JMPTBL (L(bwd_write_65bytes), L(table_144_bytes_bwd))
2913 .int JMPTBL (L(bwd_write_66bytes), L(table_144_bytes_bwd))
2914 .int JMPTBL (L(bwd_write_67bytes), L(table_144_bytes_bwd))
2915 .int JMPTBL (L(bwd_write_68bytes), L(table_144_bytes_bwd))
2916 .int JMPTBL (L(bwd_write_69bytes), L(table_144_bytes_bwd))
2917 .int JMPTBL (L(bwd_write_70bytes), L(table_144_bytes_bwd))
2918 .int JMPTBL (L(bwd_write_71bytes), L(table_144_bytes_bwd))
2919 .int JMPTBL (L(bwd_write_72bytes), L(table_144_bytes_bwd))
2920 .int JMPTBL (L(bwd_write_73bytes), L(table_144_bytes_bwd))
2921 .int JMPTBL (L(bwd_write_74bytes), L(table_144_bytes_bwd))
2922 .int JMPTBL (L(bwd_write_75bytes), L(table_144_bytes_bwd))
2923 .int JMPTBL (L(bwd_write_76bytes), L(table_144_bytes_bwd))
2924 .int JMPTBL (L(bwd_write_77bytes), L(table_144_bytes_bwd))
2925 .int JMPTBL (L(bwd_write_78bytes), L(table_144_bytes_bwd))
2926 .int JMPTBL (L(bwd_write_79bytes), L(table_144_bytes_bwd))
2927 .int JMPTBL (L(bwd_write_80bytes), L(table_144_bytes_bwd))
2928 .int JMPTBL (L(bwd_write_81bytes), L(table_144_bytes_bwd))
2929 .int JMPTBL (L(bwd_write_82bytes), L(table_144_bytes_bwd))
2930 .int JMPTBL (L(bwd_write_83bytes), L(table_144_bytes_bwd))
2931 .int JMPTBL (L(bwd_write_84bytes), L(table_144_bytes_bwd))
2932 .int JMPTBL (L(bwd_write_85bytes), L(table_144_bytes_bwd))
2933 .int JMPTBL (L(bwd_write_86bytes), L(table_144_bytes_bwd))
2934 .int JMPTBL (L(bwd_write_87bytes), L(table_144_bytes_bwd))
2935 .int JMPTBL (L(bwd_write_88bytes), L(table_144_bytes_bwd))
2936 .int JMPTBL (L(bwd_write_89bytes), L(table_144_bytes_bwd))
2937 .int JMPTBL (L(bwd_write_90bytes), L(table_144_bytes_bwd))
2938 .int JMPTBL (L(bwd_write_91bytes), L(table_144_bytes_bwd))
2939 .int JMPTBL (L(bwd_write_92bytes), L(table_144_bytes_bwd))
2940 .int JMPTBL (L(bwd_write_93bytes), L(table_144_bytes_bwd))
2941 .int JMPTBL (L(bwd_write_94bytes), L(table_144_bytes_bwd))
2942 .int JMPTBL (L(bwd_write_95bytes), L(table_144_bytes_bwd))
2943 .int JMPTBL (L(bwd_write_96bytes), L(table_144_bytes_bwd))
2944 .int JMPTBL (L(bwd_write_97bytes), L(table_144_bytes_bwd))
2945 .int JMPTBL (L(bwd_write_98bytes), L(table_144_bytes_bwd))
2946 .int JMPTBL (L(bwd_write_99bytes), L(table_144_bytes_bwd))
2947 .int JMPTBL (L(bwd_write_100bytes), L(table_144_bytes_bwd))
2948 .int JMPTBL (L(bwd_write_101bytes), L(table_144_bytes_bwd))
2949 .int JMPTBL (L(bwd_write_102bytes), L(table_144_bytes_bwd))
2950 .int JMPTBL (L(bwd_write_103bytes), L(table_144_bytes_bwd))
2951 .int JMPTBL (L(bwd_write_104bytes), L(table_144_bytes_bwd))
2952 .int JMPTBL (L(bwd_write_105bytes), L(table_144_bytes_bwd))
2953 .int JMPTBL (L(bwd_write_106bytes), L(table_144_bytes_bwd))
2954 .int JMPTBL (L(bwd_write_107bytes), L(table_144_bytes_bwd))
2955 .int JMPTBL (L(bwd_write_108bytes), L(table_144_bytes_bwd))
2956 .int JMPTBL (L(bwd_write_109bytes), L(table_144_bytes_bwd))
2957 .int JMPTBL (L(bwd_write_110bytes), L(table_144_bytes_bwd))
2958 .int JMPTBL (L(bwd_write_111bytes), L(table_144_bytes_bwd))
2959 .int JMPTBL (L(bwd_write_112bytes), L(table_144_bytes_bwd))
2960 .int JMPTBL (L(bwd_write_113bytes), L(table_144_bytes_bwd))
2961 .int JMPTBL (L(bwd_write_114bytes), L(table_144_bytes_bwd))
2962 .int JMPTBL (L(bwd_write_115bytes), L(table_144_bytes_bwd))
2963 .int JMPTBL (L(bwd_write_116bytes), L(table_144_bytes_bwd))
2964 .int JMPTBL (L(bwd_write_117bytes), L(table_144_bytes_bwd))
2965 .int JMPTBL (L(bwd_write_118bytes), L(table_144_bytes_bwd))
2966 .int JMPTBL (L(bwd_write_119bytes), L(table_144_bytes_bwd))
2967 .int JMPTBL (L(bwd_write_120bytes), L(table_144_bytes_bwd))
2968 .int JMPTBL (L(bwd_write_121bytes), L(table_144_bytes_bwd))
2969 .int JMPTBL (L(bwd_write_122bytes), L(table_144_bytes_bwd))
2970 .int JMPTBL (L(bwd_write_123bytes), L(table_144_bytes_bwd))
2971 .int JMPTBL (L(bwd_write_124bytes), L(table_144_bytes_bwd))
2972 .int JMPTBL (L(bwd_write_125bytes), L(table_144_bytes_bwd))
2973 .int JMPTBL (L(bwd_write_126bytes), L(table_144_bytes_bwd))
2974 .int JMPTBL (L(bwd_write_127bytes), L(table_144_bytes_bwd))
2975 .int JMPTBL (L(bwd_write_128bytes), L(table_144_bytes_bwd))
2976 .int JMPTBL (L(bwd_write_129bytes), L(table_144_bytes_bwd))
2977 .int JMPTBL (L(bwd_write_130bytes), L(table_144_bytes_bwd))
2978 .int JMPTBL (L(bwd_write_131bytes), L(table_144_bytes_bwd))
2979 .int JMPTBL (L(bwd_write_132bytes), L(table_144_bytes_bwd))
2980 .int JMPTBL (L(bwd_write_133bytes), L(table_144_bytes_bwd))
2981 .int JMPTBL (L(bwd_write_134bytes), L(table_144_bytes_bwd))
2982 .int JMPTBL (L(bwd_write_135bytes), L(table_144_bytes_bwd))
2983 .int JMPTBL (L(bwd_write_136bytes), L(table_144_bytes_bwd))
2984 .int JMPTBL (L(bwd_write_137bytes), L(table_144_bytes_bwd))
2985 .int JMPTBL (L(bwd_write_138bytes), L(table_144_bytes_bwd))
2986 .int JMPTBL (L(bwd_write_139bytes), L(table_144_bytes_bwd))
2987 .int JMPTBL (L(bwd_write_140bytes), L(table_144_bytes_bwd))
2988 .int JMPTBL (L(bwd_write_141bytes), L(table_144_bytes_bwd))
2989 .int JMPTBL (L(bwd_write_142bytes), L(table_144_bytes_bwd))
2990 .int JMPTBL (L(bwd_write_143bytes), L(table_144_bytes_bwd))
2991
2992 .p2align 3
2993 L(table_144_bytes_fwd):
2994 .int JMPTBL (L(fwd_write_0bytes), L(table_144_bytes_fwd))
2995 .int JMPTBL (L(fwd_write_1bytes), L(table_144_bytes_fwd))
2996 .int JMPTBL (L(fwd_write_2bytes), L(table_144_bytes_fwd))
2997 .int JMPTBL (L(fwd_write_3bytes), L(table_144_bytes_fwd))
2998 .int JMPTBL (L(fwd_write_4bytes), L(table_144_bytes_fwd))
2999 .int JMPTBL (L(fwd_write_5bytes), L(table_144_bytes_fwd))
3000 .int JMPTBL (L(fwd_write_6bytes), L(table_144_bytes_fwd))
3001 .int JMPTBL (L(fwd_write_7bytes), L(table_144_bytes_fwd))
3002 .int JMPTBL (L(fwd_write_8bytes), L(table_144_bytes_fwd))
3003 .int JMPTBL (L(fwd_write_9bytes), L(table_144_bytes_fwd))
3004 .int JMPTBL (L(fwd_write_10bytes), L(table_144_bytes_fwd))
3005 .int JMPTBL (L(fwd_write_11bytes), L(table_144_bytes_fwd))
3006 .int JMPTBL (L(fwd_write_12bytes), L(table_144_bytes_fwd))
3007 .int JMPTBL (L(fwd_write_13bytes), L(table_144_bytes_fwd))
3008 .int JMPTBL (L(fwd_write_14bytes), L(table_144_bytes_fwd))
3009 .int JMPTBL (L(fwd_write_15bytes), L(table_144_bytes_fwd))
3010 .int JMPTBL (L(fwd_write_16bytes), L(table_144_bytes_fwd))
3011 .int JMPTBL (L(fwd_write_17bytes), L(table_144_bytes_fwd))
3012 .int JMPTBL (L(fwd_write_18bytes), L(table_144_bytes_fwd))
3013 .int JMPTBL (L(fwd_write_19bytes), L(table_144_bytes_fwd))
3014 .int JMPTBL (L(fwd_write_20bytes), L(table_144_bytes_fwd))
3015 .int JMPTBL (L(fwd_write_21bytes), L(table_144_bytes_fwd))
3016 .int JMPTBL (L(fwd_write_22bytes), L(table_144_bytes_fwd))
3017 .int JMPTBL (L(fwd_write_23bytes), L(table_144_bytes_fwd))
3018 .int JMPTBL (L(fwd_write_24bytes), L(table_144_bytes_fwd))
3019 .int JMPTBL (L(fwd_write_25bytes), L(table_144_bytes_fwd))
3020 .int JMPTBL (L(fwd_write_26bytes), L(table_144_bytes_fwd))
3021 .int JMPTBL (L(fwd_write_27bytes), L(table_144_bytes_fwd))
3022 .int JMPTBL (L(fwd_write_28bytes), L(table_144_bytes_fwd))
3023 .int JMPTBL (L(fwd_write_29bytes), L(table_144_bytes_fwd))
3024 .int JMPTBL (L(fwd_write_30bytes), L(table_144_bytes_fwd))
3025 .int JMPTBL (L(fwd_write_31bytes), L(table_144_bytes_fwd))
3026 .int JMPTBL (L(fwd_write_32bytes), L(table_144_bytes_fwd))
3027 .int JMPTBL (L(fwd_write_33bytes), L(table_144_bytes_fwd))
3028 .int JMPTBL (L(fwd_write_34bytes), L(table_144_bytes_fwd))
3029 .int JMPTBL (L(fwd_write_35bytes), L(table_144_bytes_fwd))
3030 .int JMPTBL (L(fwd_write_36bytes), L(table_144_bytes_fwd))
3031 .int JMPTBL (L(fwd_write_37bytes), L(table_144_bytes_fwd))
3032 .int JMPTBL (L(fwd_write_38bytes), L(table_144_bytes_fwd))
3033 .int JMPTBL (L(fwd_write_39bytes), L(table_144_bytes_fwd))
3034 .int JMPTBL (L(fwd_write_40bytes), L(table_144_bytes_fwd))
3035 .int JMPTBL (L(fwd_write_41bytes), L(table_144_bytes_fwd))
3036 .int JMPTBL (L(fwd_write_42bytes), L(table_144_bytes_fwd))
3037 .int JMPTBL (L(fwd_write_43bytes), L(table_144_bytes_fwd))
3038 .int JMPTBL (L(fwd_write_44bytes), L(table_144_bytes_fwd))
3039 .int JMPTBL (L(fwd_write_45bytes), L(table_144_bytes_fwd))
3040 .int JMPTBL (L(fwd_write_46bytes), L(table_144_bytes_fwd))
3041 .int JMPTBL (L(fwd_write_47bytes), L(table_144_bytes_fwd))
3042 .int JMPTBL (L(fwd_write_48bytes), L(table_144_bytes_fwd))
3043 .int JMPTBL (L(fwd_write_49bytes), L(table_144_bytes_fwd))
3044 .int JMPTBL (L(fwd_write_50bytes), L(table_144_bytes_fwd))
3045 .int JMPTBL (L(fwd_write_51bytes), L(table_144_bytes_fwd))
3046 .int JMPTBL (L(fwd_write_52bytes), L(table_144_bytes_fwd))
3047 .int JMPTBL (L(fwd_write_53bytes), L(table_144_bytes_fwd))
3048 .int JMPTBL (L(fwd_write_54bytes), L(table_144_bytes_fwd))
3049 .int JMPTBL (L(fwd_write_55bytes), L(table_144_bytes_fwd))
3050 .int JMPTBL (L(fwd_write_56bytes), L(table_144_bytes_fwd))
3051 .int JMPTBL (L(fwd_write_57bytes), L(table_144_bytes_fwd))
3052 .int JMPTBL (L(fwd_write_58bytes), L(table_144_bytes_fwd))
3053 .int JMPTBL (L(fwd_write_59bytes), L(table_144_bytes_fwd))
3054 .int JMPTBL (L(fwd_write_60bytes), L(table_144_bytes_fwd))
3055 .int JMPTBL (L(fwd_write_61bytes), L(table_144_bytes_fwd))
3056 .int JMPTBL (L(fwd_write_62bytes), L(table_144_bytes_fwd))
3057 .int JMPTBL (L(fwd_write_63bytes), L(table_144_bytes_fwd))
3058 .int JMPTBL (L(fwd_write_64bytes), L(table_144_bytes_fwd))
3059 .int JMPTBL (L(fwd_write_65bytes), L(table_144_bytes_fwd))
3060 .int JMPTBL (L(fwd_write_66bytes), L(table_144_bytes_fwd))
3061 .int JMPTBL (L(fwd_write_67bytes), L(table_144_bytes_fwd))
3062 .int JMPTBL (L(fwd_write_68bytes), L(table_144_bytes_fwd))
3063 .int JMPTBL (L(fwd_write_69bytes), L(table_144_bytes_fwd))
3064 .int JMPTBL (L(fwd_write_70bytes), L(table_144_bytes_fwd))
3065 .int JMPTBL (L(fwd_write_71bytes), L(table_144_bytes_fwd))
3066 .int JMPTBL (L(fwd_write_72bytes), L(table_144_bytes_fwd))
3067 .int JMPTBL (L(fwd_write_73bytes), L(table_144_bytes_fwd))
3068 .int JMPTBL (L(fwd_write_74bytes), L(table_144_bytes_fwd))
3069 .int JMPTBL (L(fwd_write_75bytes), L(table_144_bytes_fwd))
3070 .int JMPTBL (L(fwd_write_76bytes), L(table_144_bytes_fwd))
3071 .int JMPTBL (L(fwd_write_77bytes), L(table_144_bytes_fwd))
3072 .int JMPTBL (L(fwd_write_78bytes), L(table_144_bytes_fwd))
3073 .int JMPTBL (L(fwd_write_79bytes), L(table_144_bytes_fwd))
3074 .int JMPTBL (L(fwd_write_80bytes), L(table_144_bytes_fwd))
3075 .int JMPTBL (L(fwd_write_81bytes), L(table_144_bytes_fwd))
3076 .int JMPTBL (L(fwd_write_82bytes), L(table_144_bytes_fwd))
3077 .int JMPTBL (L(fwd_write_83bytes), L(table_144_bytes_fwd))
3078 .int JMPTBL (L(fwd_write_84bytes), L(table_144_bytes_fwd))
3079 .int JMPTBL (L(fwd_write_85bytes), L(table_144_bytes_fwd))
3080 .int JMPTBL (L(fwd_write_86bytes), L(table_144_bytes_fwd))
3081 .int JMPTBL (L(fwd_write_87bytes), L(table_144_bytes_fwd))
3082 .int JMPTBL (L(fwd_write_88bytes), L(table_144_bytes_fwd))
3083 .int JMPTBL (L(fwd_write_89bytes), L(table_144_bytes_fwd))
3084 .int JMPTBL (L(fwd_write_90bytes), L(table_144_bytes_fwd))
3085 .int JMPTBL (L(fwd_write_91bytes), L(table_144_bytes_fwd))
3086 .int JMPTBL (L(fwd_write_92bytes), L(table_144_bytes_fwd))
3087 .int JMPTBL (L(fwd_write_93bytes), L(table_144_bytes_fwd))
3088 .int JMPTBL (L(fwd_write_94bytes), L(table_144_bytes_fwd))
3089 .int JMPTBL (L(fwd_write_95bytes), L(table_144_bytes_fwd))
3090 .int JMPTBL (L(fwd_write_96bytes), L(table_144_bytes_fwd))
3091 .int JMPTBL (L(fwd_write_97bytes), L(table_144_bytes_fwd))
3092 .int JMPTBL (L(fwd_write_98bytes), L(table_144_bytes_fwd))
3093 .int JMPTBL (L(fwd_write_99bytes), L(table_144_bytes_fwd))
3094 .int JMPTBL (L(fwd_write_100bytes), L(table_144_bytes_fwd))
3095 .int JMPTBL (L(fwd_write_101bytes), L(table_144_bytes_fwd))
3096 .int JMPTBL (L(fwd_write_102bytes), L(table_144_bytes_fwd))
3097 .int JMPTBL (L(fwd_write_103bytes), L(table_144_bytes_fwd))
3098 .int JMPTBL (L(fwd_write_104bytes), L(table_144_bytes_fwd))
3099 .int JMPTBL (L(fwd_write_105bytes), L(table_144_bytes_fwd))
3100 .int JMPTBL (L(fwd_write_106bytes), L(table_144_bytes_fwd))
3101 .int JMPTBL (L(fwd_write_107bytes), L(table_144_bytes_fwd))
3102 .int JMPTBL (L(fwd_write_108bytes), L(table_144_bytes_fwd))
3103 .int JMPTBL (L(fwd_write_109bytes), L(table_144_bytes_fwd))
3104 .int JMPTBL (L(fwd_write_110bytes), L(table_144_bytes_fwd))
3105 .int JMPTBL (L(fwd_write_111bytes), L(table_144_bytes_fwd))
3106 .int JMPTBL (L(fwd_write_112bytes), L(table_144_bytes_fwd))
3107 .int JMPTBL (L(fwd_write_113bytes), L(table_144_bytes_fwd))
3108 .int JMPTBL (L(fwd_write_114bytes), L(table_144_bytes_fwd))
3109 .int JMPTBL (L(fwd_write_115bytes), L(table_144_bytes_fwd))
3110 .int JMPTBL (L(fwd_write_116bytes), L(table_144_bytes_fwd))
3111 .int JMPTBL (L(fwd_write_117bytes), L(table_144_bytes_fwd))
3112 .int JMPTBL (L(fwd_write_118bytes), L(table_144_bytes_fwd))
3113 .int JMPTBL (L(fwd_write_119bytes), L(table_144_bytes_fwd))
3114 .int JMPTBL (L(fwd_write_120bytes), L(table_144_bytes_fwd))
3115 .int JMPTBL (L(fwd_write_121bytes), L(table_144_bytes_fwd))
3116 .int JMPTBL (L(fwd_write_122bytes), L(table_144_bytes_fwd))
3117 .int JMPTBL (L(fwd_write_123bytes), L(table_144_bytes_fwd))
3118 .int JMPTBL (L(fwd_write_124bytes), L(table_144_bytes_fwd))
3119 .int JMPTBL (L(fwd_write_125bytes), L(table_144_bytes_fwd))
3120 .int JMPTBL (L(fwd_write_126bytes), L(table_144_bytes_fwd))
3121 .int JMPTBL (L(fwd_write_127bytes), L(table_144_bytes_fwd))
3122 .int JMPTBL (L(fwd_write_128bytes), L(table_144_bytes_fwd))
3123 .int JMPTBL (L(fwd_write_129bytes), L(table_144_bytes_fwd))
3124 .int JMPTBL (L(fwd_write_130bytes), L(table_144_bytes_fwd))
3125 .int JMPTBL (L(fwd_write_131bytes), L(table_144_bytes_fwd))
3126 .int JMPTBL (L(fwd_write_132bytes), L(table_144_bytes_fwd))
3127 .int JMPTBL (L(fwd_write_133bytes), L(table_144_bytes_fwd))
3128 .int JMPTBL (L(fwd_write_134bytes), L(table_144_bytes_fwd))
3129 .int JMPTBL (L(fwd_write_135bytes), L(table_144_bytes_fwd))
3130 .int JMPTBL (L(fwd_write_136bytes), L(table_144_bytes_fwd))
3131 .int JMPTBL (L(fwd_write_137bytes), L(table_144_bytes_fwd))
3132 .int JMPTBL (L(fwd_write_138bytes), L(table_144_bytes_fwd))
3133 .int JMPTBL (L(fwd_write_139bytes), L(table_144_bytes_fwd))
3134 .int JMPTBL (L(fwd_write_140bytes), L(table_144_bytes_fwd))
3135 .int JMPTBL (L(fwd_write_141bytes), L(table_144_bytes_fwd))
3136 .int JMPTBL (L(fwd_write_142bytes), L(table_144_bytes_fwd))
3137 .int JMPTBL (L(fwd_write_143bytes), L(table_144_bytes_fwd))
3138
3139 .p2align 3
3140 L(shl_table_fwd):
3141 .int JMPTBL (L(shl_0), L(shl_table_fwd))
3142 .int JMPTBL (L(shl_1), L(shl_table_fwd))
3143 .int JMPTBL (L(shl_2), L(shl_table_fwd))
3144 .int JMPTBL (L(shl_3), L(shl_table_fwd))
3145 .int JMPTBL (L(shl_4), L(shl_table_fwd))
3146 .int JMPTBL (L(shl_5), L(shl_table_fwd))
3147 .int JMPTBL (L(shl_6), L(shl_table_fwd))
3148 .int JMPTBL (L(shl_7), L(shl_table_fwd))
3149 .int JMPTBL (L(shl_8), L(shl_table_fwd))
3150 .int JMPTBL (L(shl_9), L(shl_table_fwd))
3151 .int JMPTBL (L(shl_10), L(shl_table_fwd))
3152 .int JMPTBL (L(shl_11), L(shl_table_fwd))
3153 .int JMPTBL (L(shl_12), L(shl_table_fwd))
3154 .int JMPTBL (L(shl_13), L(shl_table_fwd))
3155 .int JMPTBL (L(shl_14), L(shl_table_fwd))
3156 .int JMPTBL (L(shl_15), L(shl_table_fwd))
3157
3158 .p2align 3
3159 L(shl_table_bwd):
3160 .int JMPTBL (L(shl_0_bwd), L(shl_table_bwd))
3161 .int JMPTBL (L(shl_1_bwd), L(shl_table_bwd))
3162 .int JMPTBL (L(shl_2_bwd), L(shl_table_bwd))
3163 .int JMPTBL (L(shl_3_bwd), L(shl_table_bwd))
3164 .int JMPTBL (L(shl_4_bwd), L(shl_table_bwd))
3165 .int JMPTBL (L(shl_5_bwd), L(shl_table_bwd))
3166 .int JMPTBL (L(shl_6_bwd), L(shl_table_bwd))
3167 .int JMPTBL (L(shl_7_bwd), L(shl_table_bwd))
3168 .int JMPTBL (L(shl_8_bwd), L(shl_table_bwd))
3169 .int JMPTBL (L(shl_9_bwd), L(shl_table_bwd))
3170 .int JMPTBL (L(shl_10_bwd), L(shl_table_bwd))
3171 .int JMPTBL (L(shl_11_bwd), L(shl_table_bwd))
3172 .int JMPTBL (L(shl_12_bwd), L(shl_table_bwd))
3173 .int JMPTBL (L(shl_13_bwd), L(shl_table_bwd))
3174 .int JMPTBL (L(shl_14_bwd), L(shl_table_bwd))
3175 .int JMPTBL (L(shl_15_bwd), L(shl_table_bwd))
3176
3177 #endif