]> git.ipfire.org Git - thirdparty/glibc.git/blob - sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
Update copyright dates with scripts/update-copyrights.
[thirdparty/glibc.git] / sysdeps / x86_64 / multiarch / memcpy-ssse3-back.S
1 /* memcpy with SSSE3 and REP string
2 Copyright (C) 2010-2017 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20 #include <sysdep.h>
21
22 #if IS_IN (libc) \
23 && (defined SHARED \
24 || defined USE_AS_MEMMOVE \
25 || !defined USE_MULTIARCH)
26
27 #include "asm-syntax.h"
28
29 #ifndef MEMCPY
30 # define MEMCPY __memcpy_ssse3_back
31 # define MEMCPY_CHK __memcpy_chk_ssse3_back
32 # define MEMPCPY __mempcpy_ssse3_back
33 # define MEMPCPY_CHK __mempcpy_chk_ssse3_back
34 #endif
35
36 #define JMPTBL(I, B) I - B
37
38 /* Branch to an entry in a jump table. TABLE is a jump table with
39 relative offsets. INDEX is a register contains the index into the
40 jump table. SCALE is the scale of INDEX. */
41 #define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
42 lea TABLE(%rip), %r11; \
43 movslq (%r11, INDEX, SCALE), INDEX; \
44 lea (%r11, INDEX), INDEX; \
45 jmp *INDEX; \
46 ud2
47
48 .section .text.ssse3,"ax",@progbits
49 #if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
50 ENTRY (MEMPCPY_CHK)
51 cmpq %rdx, %rcx
52 jb HIDDEN_JUMPTARGET (__chk_fail)
53 END (MEMPCPY_CHK)
54
55 ENTRY (MEMPCPY)
56 movq %rdi, %rax
57 addq %rdx, %rax
58 jmp L(start)
59 END (MEMPCPY)
60 #endif
61
62 #if !defined USE_AS_BCOPY
63 ENTRY (MEMCPY_CHK)
64 cmpq %rdx, %rcx
65 jb HIDDEN_JUMPTARGET (__chk_fail)
66 END (MEMCPY_CHK)
67 #endif
68
69 ENTRY (MEMCPY)
70 mov %rdi, %rax
71 #ifdef USE_AS_MEMPCPY
72 add %rdx, %rax
73 #endif
74
75 #ifdef USE_AS_MEMMOVE
76 cmp %rsi, %rdi
77 jb L(copy_forward)
78 je L(bwd_write_0bytes)
79 cmp $144, %rdx
80 jae L(copy_backward)
81 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
82 L(copy_forward):
83 #endif
84 L(start):
85 cmp $144, %rdx
86 jae L(144bytesormore)
87
88 L(fwd_write_less32bytes):
89 #ifndef USE_AS_MEMMOVE
90 cmp %dil, %sil
91 jbe L(bk_write)
92 #endif
93 add %rdx, %rsi
94 add %rdx, %rdi
95 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
96 #ifndef USE_AS_MEMMOVE
97 L(bk_write):
98
99 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
100 #endif
101
102 .p2align 4
103 L(144bytesormore):
104
105 #ifndef USE_AS_MEMMOVE
106 cmp %dil, %sil
107 jle L(copy_backward)
108 #endif
109 movdqu (%rsi), %xmm0
110 mov %rdi, %r8
111 and $-16, %rdi
112 add $16, %rdi
113 mov %rdi, %r9
114 sub %r8, %r9
115 sub %r9, %rdx
116 add %r9, %rsi
117 mov %rsi, %r9
118 and $0xf, %r9
119 jz L(shl_0)
120 #ifdef DATA_CACHE_SIZE
121 mov $DATA_CACHE_SIZE, %RCX_LP
122 #else
123 mov __x86_data_cache_size(%rip), %RCX_LP
124 #endif
125 cmp %rcx, %rdx
126 jae L(gobble_mem_fwd)
127 lea L(shl_table_fwd)(%rip), %r11
128 sub $0x80, %rdx
129 movslq (%r11, %r9, 4), %r9
130 add %r11, %r9
131 jmp *%r9
132 ud2
133
134 .p2align 4
135 L(copy_backward):
136 #ifdef DATA_CACHE_SIZE
137 mov $DATA_CACHE_SIZE, %RCX_LP
138 #else
139 mov __x86_data_cache_size(%rip), %RCX_LP
140 #endif
141 shl $1, %rcx
142 cmp %rcx, %rdx
143 ja L(gobble_mem_bwd)
144
145 add %rdx, %rdi
146 add %rdx, %rsi
147 movdqu -16(%rsi), %xmm0
148 lea -16(%rdi), %r8
149 mov %rdi, %r9
150 and $0xf, %r9
151 xor %r9, %rdi
152 sub %r9, %rsi
153 sub %r9, %rdx
154 mov %rsi, %r9
155 and $0xf, %r9
156 jz L(shl_0_bwd)
157 lea L(shl_table_bwd)(%rip), %r11
158 sub $0x80, %rdx
159 movslq (%r11, %r9, 4), %r9
160 add %r11, %r9
161 jmp *%r9
162 ud2
163
164 .p2align 4
165 L(shl_0):
166
167 mov %rdx, %r9
168 shr $8, %r9
169 add %rdx, %r9
170 #ifdef DATA_CACHE_SIZE
171 cmp $DATA_CACHE_SIZE_HALF, %R9_LP
172 #else
173 cmp __x86_data_cache_size_half(%rip), %R9_LP
174 #endif
175 jae L(gobble_mem_fwd)
176 sub $0x80, %rdx
177 .p2align 4
178 L(shl_0_loop):
179 movdqa (%rsi), %xmm1
180 movdqa %xmm1, (%rdi)
181 movaps 0x10(%rsi), %xmm2
182 movaps %xmm2, 0x10(%rdi)
183 movaps 0x20(%rsi), %xmm3
184 movaps %xmm3, 0x20(%rdi)
185 movaps 0x30(%rsi), %xmm4
186 movaps %xmm4, 0x30(%rdi)
187 movaps 0x40(%rsi), %xmm1
188 movaps %xmm1, 0x40(%rdi)
189 movaps 0x50(%rsi), %xmm2
190 movaps %xmm2, 0x50(%rdi)
191 movaps 0x60(%rsi), %xmm3
192 movaps %xmm3, 0x60(%rdi)
193 movaps 0x70(%rsi), %xmm4
194 movaps %xmm4, 0x70(%rdi)
195 sub $0x80, %rdx
196 lea 0x80(%rsi), %rsi
197 lea 0x80(%rdi), %rdi
198 jae L(shl_0_loop)
199 movdqu %xmm0, (%r8)
200 add $0x80, %rdx
201 add %rdx, %rsi
202 add %rdx, %rdi
203 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
204
205 .p2align 4
206 L(shl_0_bwd):
207 sub $0x80, %rdx
208 L(copy_backward_loop):
209 movaps -0x10(%rsi), %xmm1
210 movaps %xmm1, -0x10(%rdi)
211 movaps -0x20(%rsi), %xmm2
212 movaps %xmm2, -0x20(%rdi)
213 movaps -0x30(%rsi), %xmm3
214 movaps %xmm3, -0x30(%rdi)
215 movaps -0x40(%rsi), %xmm4
216 movaps %xmm4, -0x40(%rdi)
217 movaps -0x50(%rsi), %xmm5
218 movaps %xmm5, -0x50(%rdi)
219 movaps -0x60(%rsi), %xmm5
220 movaps %xmm5, -0x60(%rdi)
221 movaps -0x70(%rsi), %xmm5
222 movaps %xmm5, -0x70(%rdi)
223 movaps -0x80(%rsi), %xmm5
224 movaps %xmm5, -0x80(%rdi)
225 sub $0x80, %rdx
226 lea -0x80(%rdi), %rdi
227 lea -0x80(%rsi), %rsi
228 jae L(copy_backward_loop)
229
230 movdqu %xmm0, (%r8)
231 add $0x80, %rdx
232 sub %rdx, %rdi
233 sub %rdx, %rsi
234 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
235
236 .p2align 4
237 L(shl_1):
238 sub $0x80, %rdx
239 movaps -0x01(%rsi), %xmm1
240 movaps 0x0f(%rsi), %xmm2
241 movaps 0x1f(%rsi), %xmm3
242 movaps 0x2f(%rsi), %xmm4
243 movaps 0x3f(%rsi), %xmm5
244 movaps 0x4f(%rsi), %xmm6
245 movaps 0x5f(%rsi), %xmm7
246 movaps 0x6f(%rsi), %xmm8
247 movaps 0x7f(%rsi), %xmm9
248 lea 0x80(%rsi), %rsi
249 palignr $1, %xmm8, %xmm9
250 movaps %xmm9, 0x70(%rdi)
251 palignr $1, %xmm7, %xmm8
252 movaps %xmm8, 0x60(%rdi)
253 palignr $1, %xmm6, %xmm7
254 movaps %xmm7, 0x50(%rdi)
255 palignr $1, %xmm5, %xmm6
256 movaps %xmm6, 0x40(%rdi)
257 palignr $1, %xmm4, %xmm5
258 movaps %xmm5, 0x30(%rdi)
259 palignr $1, %xmm3, %xmm4
260 movaps %xmm4, 0x20(%rdi)
261 palignr $1, %xmm2, %xmm3
262 movaps %xmm3, 0x10(%rdi)
263 palignr $1, %xmm1, %xmm2
264 movaps %xmm2, (%rdi)
265 lea 0x80(%rdi), %rdi
266 jae L(shl_1)
267 movdqu %xmm0, (%r8)
268 add $0x80, %rdx
269 add %rdx, %rdi
270 add %rdx, %rsi
271 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
272
273 .p2align 4
274 L(shl_1_bwd):
275 movaps -0x01(%rsi), %xmm1
276
277 movaps -0x11(%rsi), %xmm2
278 palignr $1, %xmm2, %xmm1
279 movaps %xmm1, -0x10(%rdi)
280
281 movaps -0x21(%rsi), %xmm3
282 palignr $1, %xmm3, %xmm2
283 movaps %xmm2, -0x20(%rdi)
284
285 movaps -0x31(%rsi), %xmm4
286 palignr $1, %xmm4, %xmm3
287 movaps %xmm3, -0x30(%rdi)
288
289 movaps -0x41(%rsi), %xmm5
290 palignr $1, %xmm5, %xmm4
291 movaps %xmm4, -0x40(%rdi)
292
293 movaps -0x51(%rsi), %xmm6
294 palignr $1, %xmm6, %xmm5
295 movaps %xmm5, -0x50(%rdi)
296
297 movaps -0x61(%rsi), %xmm7
298 palignr $1, %xmm7, %xmm6
299 movaps %xmm6, -0x60(%rdi)
300
301 movaps -0x71(%rsi), %xmm8
302 palignr $1, %xmm8, %xmm7
303 movaps %xmm7, -0x70(%rdi)
304
305 movaps -0x81(%rsi), %xmm9
306 palignr $1, %xmm9, %xmm8
307 movaps %xmm8, -0x80(%rdi)
308
309 sub $0x80, %rdx
310 lea -0x80(%rdi), %rdi
311 lea -0x80(%rsi), %rsi
312 jae L(shl_1_bwd)
313 movdqu %xmm0, (%r8)
314 add $0x80, %rdx
315 sub %rdx, %rdi
316 sub %rdx, %rsi
317 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
318
319 .p2align 4
320 L(shl_2):
321 sub $0x80, %rdx
322 movaps -0x02(%rsi), %xmm1
323 movaps 0x0e(%rsi), %xmm2
324 movaps 0x1e(%rsi), %xmm3
325 movaps 0x2e(%rsi), %xmm4
326 movaps 0x3e(%rsi), %xmm5
327 movaps 0x4e(%rsi), %xmm6
328 movaps 0x5e(%rsi), %xmm7
329 movaps 0x6e(%rsi), %xmm8
330 movaps 0x7e(%rsi), %xmm9
331 lea 0x80(%rsi), %rsi
332 palignr $2, %xmm8, %xmm9
333 movaps %xmm9, 0x70(%rdi)
334 palignr $2, %xmm7, %xmm8
335 movaps %xmm8, 0x60(%rdi)
336 palignr $2, %xmm6, %xmm7
337 movaps %xmm7, 0x50(%rdi)
338 palignr $2, %xmm5, %xmm6
339 movaps %xmm6, 0x40(%rdi)
340 palignr $2, %xmm4, %xmm5
341 movaps %xmm5, 0x30(%rdi)
342 palignr $2, %xmm3, %xmm4
343 movaps %xmm4, 0x20(%rdi)
344 palignr $2, %xmm2, %xmm3
345 movaps %xmm3, 0x10(%rdi)
346 palignr $2, %xmm1, %xmm2
347 movaps %xmm2, (%rdi)
348 lea 0x80(%rdi), %rdi
349 jae L(shl_2)
350 movdqu %xmm0, (%r8)
351 add $0x80, %rdx
352 add %rdx, %rdi
353 add %rdx, %rsi
354 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
355
356 .p2align 4
357 L(shl_2_bwd):
358 movaps -0x02(%rsi), %xmm1
359
360 movaps -0x12(%rsi), %xmm2
361 palignr $2, %xmm2, %xmm1
362 movaps %xmm1, -0x10(%rdi)
363
364 movaps -0x22(%rsi), %xmm3
365 palignr $2, %xmm3, %xmm2
366 movaps %xmm2, -0x20(%rdi)
367
368 movaps -0x32(%rsi), %xmm4
369 palignr $2, %xmm4, %xmm3
370 movaps %xmm3, -0x30(%rdi)
371
372 movaps -0x42(%rsi), %xmm5
373 palignr $2, %xmm5, %xmm4
374 movaps %xmm4, -0x40(%rdi)
375
376 movaps -0x52(%rsi), %xmm6
377 palignr $2, %xmm6, %xmm5
378 movaps %xmm5, -0x50(%rdi)
379
380 movaps -0x62(%rsi), %xmm7
381 palignr $2, %xmm7, %xmm6
382 movaps %xmm6, -0x60(%rdi)
383
384 movaps -0x72(%rsi), %xmm8
385 palignr $2, %xmm8, %xmm7
386 movaps %xmm7, -0x70(%rdi)
387
388 movaps -0x82(%rsi), %xmm9
389 palignr $2, %xmm9, %xmm8
390 movaps %xmm8, -0x80(%rdi)
391
392 sub $0x80, %rdx
393 lea -0x80(%rdi), %rdi
394 lea -0x80(%rsi), %rsi
395 jae L(shl_2_bwd)
396 movdqu %xmm0, (%r8)
397 add $0x80, %rdx
398 sub %rdx, %rdi
399 sub %rdx, %rsi
400 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
401
402 .p2align 4
403 L(shl_3):
404 sub $0x80, %rdx
405 movaps -0x03(%rsi), %xmm1
406 movaps 0x0d(%rsi), %xmm2
407 movaps 0x1d(%rsi), %xmm3
408 movaps 0x2d(%rsi), %xmm4
409 movaps 0x3d(%rsi), %xmm5
410 movaps 0x4d(%rsi), %xmm6
411 movaps 0x5d(%rsi), %xmm7
412 movaps 0x6d(%rsi), %xmm8
413 movaps 0x7d(%rsi), %xmm9
414 lea 0x80(%rsi), %rsi
415 palignr $3, %xmm8, %xmm9
416 movaps %xmm9, 0x70(%rdi)
417 palignr $3, %xmm7, %xmm8
418 movaps %xmm8, 0x60(%rdi)
419 palignr $3, %xmm6, %xmm7
420 movaps %xmm7, 0x50(%rdi)
421 palignr $3, %xmm5, %xmm6
422 movaps %xmm6, 0x40(%rdi)
423 palignr $3, %xmm4, %xmm5
424 movaps %xmm5, 0x30(%rdi)
425 palignr $3, %xmm3, %xmm4
426 movaps %xmm4, 0x20(%rdi)
427 palignr $3, %xmm2, %xmm3
428 movaps %xmm3, 0x10(%rdi)
429 palignr $3, %xmm1, %xmm2
430 movaps %xmm2, (%rdi)
431 lea 0x80(%rdi), %rdi
432 jae L(shl_3)
433 movdqu %xmm0, (%r8)
434 add $0x80, %rdx
435 add %rdx, %rdi
436 add %rdx, %rsi
437 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
438
439 .p2align 4
440 L(shl_3_bwd):
441 movaps -0x03(%rsi), %xmm1
442
443 movaps -0x13(%rsi), %xmm2
444 palignr $3, %xmm2, %xmm1
445 movaps %xmm1, -0x10(%rdi)
446
447 movaps -0x23(%rsi), %xmm3
448 palignr $3, %xmm3, %xmm2
449 movaps %xmm2, -0x20(%rdi)
450
451 movaps -0x33(%rsi), %xmm4
452 palignr $3, %xmm4, %xmm3
453 movaps %xmm3, -0x30(%rdi)
454
455 movaps -0x43(%rsi), %xmm5
456 palignr $3, %xmm5, %xmm4
457 movaps %xmm4, -0x40(%rdi)
458
459 movaps -0x53(%rsi), %xmm6
460 palignr $3, %xmm6, %xmm5
461 movaps %xmm5, -0x50(%rdi)
462
463 movaps -0x63(%rsi), %xmm7
464 palignr $3, %xmm7, %xmm6
465 movaps %xmm6, -0x60(%rdi)
466
467 movaps -0x73(%rsi), %xmm8
468 palignr $3, %xmm8, %xmm7
469 movaps %xmm7, -0x70(%rdi)
470
471 movaps -0x83(%rsi), %xmm9
472 palignr $3, %xmm9, %xmm8
473 movaps %xmm8, -0x80(%rdi)
474
475 sub $0x80, %rdx
476 lea -0x80(%rdi), %rdi
477 lea -0x80(%rsi), %rsi
478 jae L(shl_3_bwd)
479 movdqu %xmm0, (%r8)
480 add $0x80, %rdx
481 sub %rdx, %rdi
482 sub %rdx, %rsi
483 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
484
485 .p2align 4
486 L(shl_4):
487 sub $0x80, %rdx
488 movaps -0x04(%rsi), %xmm1
489 movaps 0x0c(%rsi), %xmm2
490 movaps 0x1c(%rsi), %xmm3
491 movaps 0x2c(%rsi), %xmm4
492 movaps 0x3c(%rsi), %xmm5
493 movaps 0x4c(%rsi), %xmm6
494 movaps 0x5c(%rsi), %xmm7
495 movaps 0x6c(%rsi), %xmm8
496 movaps 0x7c(%rsi), %xmm9
497 lea 0x80(%rsi), %rsi
498 palignr $4, %xmm8, %xmm9
499 movaps %xmm9, 0x70(%rdi)
500 palignr $4, %xmm7, %xmm8
501 movaps %xmm8, 0x60(%rdi)
502 palignr $4, %xmm6, %xmm7
503 movaps %xmm7, 0x50(%rdi)
504 palignr $4, %xmm5, %xmm6
505 movaps %xmm6, 0x40(%rdi)
506 palignr $4, %xmm4, %xmm5
507 movaps %xmm5, 0x30(%rdi)
508 palignr $4, %xmm3, %xmm4
509 movaps %xmm4, 0x20(%rdi)
510 palignr $4, %xmm2, %xmm3
511 movaps %xmm3, 0x10(%rdi)
512 palignr $4, %xmm1, %xmm2
513 movaps %xmm2, (%rdi)
514 lea 0x80(%rdi), %rdi
515 jae L(shl_4)
516 movdqu %xmm0, (%r8)
517 add $0x80, %rdx
518 add %rdx, %rdi
519 add %rdx, %rsi
520 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
521
522 .p2align 4
523 L(shl_4_bwd):
524 movaps -0x04(%rsi), %xmm1
525
526 movaps -0x14(%rsi), %xmm2
527 palignr $4, %xmm2, %xmm1
528 movaps %xmm1, -0x10(%rdi)
529
530 movaps -0x24(%rsi), %xmm3
531 palignr $4, %xmm3, %xmm2
532 movaps %xmm2, -0x20(%rdi)
533
534 movaps -0x34(%rsi), %xmm4
535 palignr $4, %xmm4, %xmm3
536 movaps %xmm3, -0x30(%rdi)
537
538 movaps -0x44(%rsi), %xmm5
539 palignr $4, %xmm5, %xmm4
540 movaps %xmm4, -0x40(%rdi)
541
542 movaps -0x54(%rsi), %xmm6
543 palignr $4, %xmm6, %xmm5
544 movaps %xmm5, -0x50(%rdi)
545
546 movaps -0x64(%rsi), %xmm7
547 palignr $4, %xmm7, %xmm6
548 movaps %xmm6, -0x60(%rdi)
549
550 movaps -0x74(%rsi), %xmm8
551 palignr $4, %xmm8, %xmm7
552 movaps %xmm7, -0x70(%rdi)
553
554 movaps -0x84(%rsi), %xmm9
555 palignr $4, %xmm9, %xmm8
556 movaps %xmm8, -0x80(%rdi)
557
558 sub $0x80, %rdx
559 lea -0x80(%rdi), %rdi
560 lea -0x80(%rsi), %rsi
561 jae L(shl_4_bwd)
562 movdqu %xmm0, (%r8)
563 add $0x80, %rdx
564 sub %rdx, %rdi
565 sub %rdx, %rsi
566 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
567
568 .p2align 4
569 L(shl_5):
570 sub $0x80, %rdx
571 movaps -0x05(%rsi), %xmm1
572 movaps 0x0b(%rsi), %xmm2
573 movaps 0x1b(%rsi), %xmm3
574 movaps 0x2b(%rsi), %xmm4
575 movaps 0x3b(%rsi), %xmm5
576 movaps 0x4b(%rsi), %xmm6
577 movaps 0x5b(%rsi), %xmm7
578 movaps 0x6b(%rsi), %xmm8
579 movaps 0x7b(%rsi), %xmm9
580 lea 0x80(%rsi), %rsi
581 palignr $5, %xmm8, %xmm9
582 movaps %xmm9, 0x70(%rdi)
583 palignr $5, %xmm7, %xmm8
584 movaps %xmm8, 0x60(%rdi)
585 palignr $5, %xmm6, %xmm7
586 movaps %xmm7, 0x50(%rdi)
587 palignr $5, %xmm5, %xmm6
588 movaps %xmm6, 0x40(%rdi)
589 palignr $5, %xmm4, %xmm5
590 movaps %xmm5, 0x30(%rdi)
591 palignr $5, %xmm3, %xmm4
592 movaps %xmm4, 0x20(%rdi)
593 palignr $5, %xmm2, %xmm3
594 movaps %xmm3, 0x10(%rdi)
595 palignr $5, %xmm1, %xmm2
596 movaps %xmm2, (%rdi)
597 lea 0x80(%rdi), %rdi
598 jae L(shl_5)
599 movdqu %xmm0, (%r8)
600 add $0x80, %rdx
601 add %rdx, %rdi
602 add %rdx, %rsi
603 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
604
605 .p2align 4
606 L(shl_5_bwd):
607 movaps -0x05(%rsi), %xmm1
608
609 movaps -0x15(%rsi), %xmm2
610 palignr $5, %xmm2, %xmm1
611 movaps %xmm1, -0x10(%rdi)
612
613 movaps -0x25(%rsi), %xmm3
614 palignr $5, %xmm3, %xmm2
615 movaps %xmm2, -0x20(%rdi)
616
617 movaps -0x35(%rsi), %xmm4
618 palignr $5, %xmm4, %xmm3
619 movaps %xmm3, -0x30(%rdi)
620
621 movaps -0x45(%rsi), %xmm5
622 palignr $5, %xmm5, %xmm4
623 movaps %xmm4, -0x40(%rdi)
624
625 movaps -0x55(%rsi), %xmm6
626 palignr $5, %xmm6, %xmm5
627 movaps %xmm5, -0x50(%rdi)
628
629 movaps -0x65(%rsi), %xmm7
630 palignr $5, %xmm7, %xmm6
631 movaps %xmm6, -0x60(%rdi)
632
633 movaps -0x75(%rsi), %xmm8
634 palignr $5, %xmm8, %xmm7
635 movaps %xmm7, -0x70(%rdi)
636
637 movaps -0x85(%rsi), %xmm9
638 palignr $5, %xmm9, %xmm8
639 movaps %xmm8, -0x80(%rdi)
640
641 sub $0x80, %rdx
642 lea -0x80(%rdi), %rdi
643 lea -0x80(%rsi), %rsi
644 jae L(shl_5_bwd)
645 movdqu %xmm0, (%r8)
646 add $0x80, %rdx
647 sub %rdx, %rdi
648 sub %rdx, %rsi
649 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
650
651 .p2align 4
652 L(shl_6):
653 sub $0x80, %rdx
654 movaps -0x06(%rsi), %xmm1
655 movaps 0x0a(%rsi), %xmm2
656 movaps 0x1a(%rsi), %xmm3
657 movaps 0x2a(%rsi), %xmm4
658 movaps 0x3a(%rsi), %xmm5
659 movaps 0x4a(%rsi), %xmm6
660 movaps 0x5a(%rsi), %xmm7
661 movaps 0x6a(%rsi), %xmm8
662 movaps 0x7a(%rsi), %xmm9
663 lea 0x80(%rsi), %rsi
664 palignr $6, %xmm8, %xmm9
665 movaps %xmm9, 0x70(%rdi)
666 palignr $6, %xmm7, %xmm8
667 movaps %xmm8, 0x60(%rdi)
668 palignr $6, %xmm6, %xmm7
669 movaps %xmm7, 0x50(%rdi)
670 palignr $6, %xmm5, %xmm6
671 movaps %xmm6, 0x40(%rdi)
672 palignr $6, %xmm4, %xmm5
673 movaps %xmm5, 0x30(%rdi)
674 palignr $6, %xmm3, %xmm4
675 movaps %xmm4, 0x20(%rdi)
676 palignr $6, %xmm2, %xmm3
677 movaps %xmm3, 0x10(%rdi)
678 palignr $6, %xmm1, %xmm2
679 movaps %xmm2, (%rdi)
680 lea 0x80(%rdi), %rdi
681 jae L(shl_6)
682 movdqu %xmm0, (%r8)
683 add $0x80, %rdx
684 add %rdx, %rdi
685 add %rdx, %rsi
686 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
687
688 .p2align 4
689 L(shl_6_bwd):
690 movaps -0x06(%rsi), %xmm1
691
692 movaps -0x16(%rsi), %xmm2
693 palignr $6, %xmm2, %xmm1
694 movaps %xmm1, -0x10(%rdi)
695
696 movaps -0x26(%rsi), %xmm3
697 palignr $6, %xmm3, %xmm2
698 movaps %xmm2, -0x20(%rdi)
699
700 movaps -0x36(%rsi), %xmm4
701 palignr $6, %xmm4, %xmm3
702 movaps %xmm3, -0x30(%rdi)
703
704 movaps -0x46(%rsi), %xmm5
705 palignr $6, %xmm5, %xmm4
706 movaps %xmm4, -0x40(%rdi)
707
708 movaps -0x56(%rsi), %xmm6
709 palignr $6, %xmm6, %xmm5
710 movaps %xmm5, -0x50(%rdi)
711
712 movaps -0x66(%rsi), %xmm7
713 palignr $6, %xmm7, %xmm6
714 movaps %xmm6, -0x60(%rdi)
715
716 movaps -0x76(%rsi), %xmm8
717 palignr $6, %xmm8, %xmm7
718 movaps %xmm7, -0x70(%rdi)
719
720 movaps -0x86(%rsi), %xmm9
721 palignr $6, %xmm9, %xmm8
722 movaps %xmm8, -0x80(%rdi)
723
724 sub $0x80, %rdx
725 lea -0x80(%rdi), %rdi
726 lea -0x80(%rsi), %rsi
727 jae L(shl_6_bwd)
728 movdqu %xmm0, (%r8)
729 add $0x80, %rdx
730 sub %rdx, %rdi
731 sub %rdx, %rsi
732 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
733
734 .p2align 4
735 L(shl_7):
736 sub $0x80, %rdx
737 movaps -0x07(%rsi), %xmm1
738 movaps 0x09(%rsi), %xmm2
739 movaps 0x19(%rsi), %xmm3
740 movaps 0x29(%rsi), %xmm4
741 movaps 0x39(%rsi), %xmm5
742 movaps 0x49(%rsi), %xmm6
743 movaps 0x59(%rsi), %xmm7
744 movaps 0x69(%rsi), %xmm8
745 movaps 0x79(%rsi), %xmm9
746 lea 0x80(%rsi), %rsi
747 palignr $7, %xmm8, %xmm9
748 movaps %xmm9, 0x70(%rdi)
749 palignr $7, %xmm7, %xmm8
750 movaps %xmm8, 0x60(%rdi)
751 palignr $7, %xmm6, %xmm7
752 movaps %xmm7, 0x50(%rdi)
753 palignr $7, %xmm5, %xmm6
754 movaps %xmm6, 0x40(%rdi)
755 palignr $7, %xmm4, %xmm5
756 movaps %xmm5, 0x30(%rdi)
757 palignr $7, %xmm3, %xmm4
758 movaps %xmm4, 0x20(%rdi)
759 palignr $7, %xmm2, %xmm3
760 movaps %xmm3, 0x10(%rdi)
761 palignr $7, %xmm1, %xmm2
762 movaps %xmm2, (%rdi)
763 lea 0x80(%rdi), %rdi
764 jae L(shl_7)
765 movdqu %xmm0, (%r8)
766 add $0x80, %rdx
767 add %rdx, %rdi
768 add %rdx, %rsi
769 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
770
771 .p2align 4
772 L(shl_7_bwd):
773 movaps -0x07(%rsi), %xmm1
774
775 movaps -0x17(%rsi), %xmm2
776 palignr $7, %xmm2, %xmm1
777 movaps %xmm1, -0x10(%rdi)
778
779 movaps -0x27(%rsi), %xmm3
780 palignr $7, %xmm3, %xmm2
781 movaps %xmm2, -0x20(%rdi)
782
783 movaps -0x37(%rsi), %xmm4
784 palignr $7, %xmm4, %xmm3
785 movaps %xmm3, -0x30(%rdi)
786
787 movaps -0x47(%rsi), %xmm5
788 palignr $7, %xmm5, %xmm4
789 movaps %xmm4, -0x40(%rdi)
790
791 movaps -0x57(%rsi), %xmm6
792 palignr $7, %xmm6, %xmm5
793 movaps %xmm5, -0x50(%rdi)
794
795 movaps -0x67(%rsi), %xmm7
796 palignr $7, %xmm7, %xmm6
797 movaps %xmm6, -0x60(%rdi)
798
799 movaps -0x77(%rsi), %xmm8
800 palignr $7, %xmm8, %xmm7
801 movaps %xmm7, -0x70(%rdi)
802
803 movaps -0x87(%rsi), %xmm9
804 palignr $7, %xmm9, %xmm8
805 movaps %xmm8, -0x80(%rdi)
806
807 sub $0x80, %rdx
808 lea -0x80(%rdi), %rdi
809 lea -0x80(%rsi), %rsi
810 jae L(shl_7_bwd)
811 movdqu %xmm0, (%r8)
812 add $0x80, %rdx
813 sub %rdx, %rdi
814 sub %rdx, %rsi
815 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
816
817 .p2align 4
818 L(shl_8):
819 sub $0x80, %rdx
820 movaps -0x08(%rsi), %xmm1
821 movaps 0x08(%rsi), %xmm2
822 movaps 0x18(%rsi), %xmm3
823 movaps 0x28(%rsi), %xmm4
824 movaps 0x38(%rsi), %xmm5
825 movaps 0x48(%rsi), %xmm6
826 movaps 0x58(%rsi), %xmm7
827 movaps 0x68(%rsi), %xmm8
828 movaps 0x78(%rsi), %xmm9
829 lea 0x80(%rsi), %rsi
830 palignr $8, %xmm8, %xmm9
831 movaps %xmm9, 0x70(%rdi)
832 palignr $8, %xmm7, %xmm8
833 movaps %xmm8, 0x60(%rdi)
834 palignr $8, %xmm6, %xmm7
835 movaps %xmm7, 0x50(%rdi)
836 palignr $8, %xmm5, %xmm6
837 movaps %xmm6, 0x40(%rdi)
838 palignr $8, %xmm4, %xmm5
839 movaps %xmm5, 0x30(%rdi)
840 palignr $8, %xmm3, %xmm4
841 movaps %xmm4, 0x20(%rdi)
842 palignr $8, %xmm2, %xmm3
843 movaps %xmm3, 0x10(%rdi)
844 palignr $8, %xmm1, %xmm2
845 movaps %xmm2, (%rdi)
846 lea 0x80(%rdi), %rdi
847 jae L(shl_8)
848 movdqu %xmm0, (%r8)
849 add $0x80, %rdx
850 add %rdx, %rdi
851 add %rdx, %rsi
852 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
853
854 .p2align 4
855 L(shl_8_bwd):
856 movaps -0x08(%rsi), %xmm1
857
858 movaps -0x18(%rsi), %xmm2
859 palignr $8, %xmm2, %xmm1
860 movaps %xmm1, -0x10(%rdi)
861
862 movaps -0x28(%rsi), %xmm3
863 palignr $8, %xmm3, %xmm2
864 movaps %xmm2, -0x20(%rdi)
865
866 movaps -0x38(%rsi), %xmm4
867 palignr $8, %xmm4, %xmm3
868 movaps %xmm3, -0x30(%rdi)
869
870 movaps -0x48(%rsi), %xmm5
871 palignr $8, %xmm5, %xmm4
872 movaps %xmm4, -0x40(%rdi)
873
874 movaps -0x58(%rsi), %xmm6
875 palignr $8, %xmm6, %xmm5
876 movaps %xmm5, -0x50(%rdi)
877
878 movaps -0x68(%rsi), %xmm7
879 palignr $8, %xmm7, %xmm6
880 movaps %xmm6, -0x60(%rdi)
881
882 movaps -0x78(%rsi), %xmm8
883 palignr $8, %xmm8, %xmm7
884 movaps %xmm7, -0x70(%rdi)
885
886 movaps -0x88(%rsi), %xmm9
887 palignr $8, %xmm9, %xmm8
888 movaps %xmm8, -0x80(%rdi)
889
890 sub $0x80, %rdx
891 lea -0x80(%rdi), %rdi
892 lea -0x80(%rsi), %rsi
893 jae L(shl_8_bwd)
894 L(shl_8_end_bwd):
895 movdqu %xmm0, (%r8)
896 add $0x80, %rdx
897 sub %rdx, %rdi
898 sub %rdx, %rsi
899 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
900
901 .p2align 4
902 L(shl_9):
903 sub $0x80, %rdx
904 movaps -0x09(%rsi), %xmm1
905 movaps 0x07(%rsi), %xmm2
906 movaps 0x17(%rsi), %xmm3
907 movaps 0x27(%rsi), %xmm4
908 movaps 0x37(%rsi), %xmm5
909 movaps 0x47(%rsi), %xmm6
910 movaps 0x57(%rsi), %xmm7
911 movaps 0x67(%rsi), %xmm8
912 movaps 0x77(%rsi), %xmm9
913 lea 0x80(%rsi), %rsi
914 palignr $9, %xmm8, %xmm9
915 movaps %xmm9, 0x70(%rdi)
916 palignr $9, %xmm7, %xmm8
917 movaps %xmm8, 0x60(%rdi)
918 palignr $9, %xmm6, %xmm7
919 movaps %xmm7, 0x50(%rdi)
920 palignr $9, %xmm5, %xmm6
921 movaps %xmm6, 0x40(%rdi)
922 palignr $9, %xmm4, %xmm5
923 movaps %xmm5, 0x30(%rdi)
924 palignr $9, %xmm3, %xmm4
925 movaps %xmm4, 0x20(%rdi)
926 palignr $9, %xmm2, %xmm3
927 movaps %xmm3, 0x10(%rdi)
928 palignr $9, %xmm1, %xmm2
929 movaps %xmm2, (%rdi)
930 lea 0x80(%rdi), %rdi
931 jae L(shl_9)
932 movdqu %xmm0, (%r8)
933 add $0x80, %rdx
934 add %rdx, %rdi
935 add %rdx, %rsi
936 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
937
938 .p2align 4
939 L(shl_9_bwd):
940 movaps -0x09(%rsi), %xmm1
941
942 movaps -0x19(%rsi), %xmm2
943 palignr $9, %xmm2, %xmm1
944 movaps %xmm1, -0x10(%rdi)
945
946 movaps -0x29(%rsi), %xmm3
947 palignr $9, %xmm3, %xmm2
948 movaps %xmm2, -0x20(%rdi)
949
950 movaps -0x39(%rsi), %xmm4
951 palignr $9, %xmm4, %xmm3
952 movaps %xmm3, -0x30(%rdi)
953
954 movaps -0x49(%rsi), %xmm5
955 palignr $9, %xmm5, %xmm4
956 movaps %xmm4, -0x40(%rdi)
957
958 movaps -0x59(%rsi), %xmm6
959 palignr $9, %xmm6, %xmm5
960 movaps %xmm5, -0x50(%rdi)
961
962 movaps -0x69(%rsi), %xmm7
963 palignr $9, %xmm7, %xmm6
964 movaps %xmm6, -0x60(%rdi)
965
966 movaps -0x79(%rsi), %xmm8
967 palignr $9, %xmm8, %xmm7
968 movaps %xmm7, -0x70(%rdi)
969
970 movaps -0x89(%rsi), %xmm9
971 palignr $9, %xmm9, %xmm8
972 movaps %xmm8, -0x80(%rdi)
973
974 sub $0x80, %rdx
975 lea -0x80(%rdi), %rdi
976 lea -0x80(%rsi), %rsi
977 jae L(shl_9_bwd)
978 movdqu %xmm0, (%r8)
979 add $0x80, %rdx
980 sub %rdx, %rdi
981 sub %rdx, %rsi
982 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
983
984 .p2align 4
985 L(shl_10):
986 sub $0x80, %rdx
987 movaps -0x0a(%rsi), %xmm1
988 movaps 0x06(%rsi), %xmm2
989 movaps 0x16(%rsi), %xmm3
990 movaps 0x26(%rsi), %xmm4
991 movaps 0x36(%rsi), %xmm5
992 movaps 0x46(%rsi), %xmm6
993 movaps 0x56(%rsi), %xmm7
994 movaps 0x66(%rsi), %xmm8
995 movaps 0x76(%rsi), %xmm9
996 lea 0x80(%rsi), %rsi
997 palignr $10, %xmm8, %xmm9
998 movaps %xmm9, 0x70(%rdi)
999 palignr $10, %xmm7, %xmm8
1000 movaps %xmm8, 0x60(%rdi)
1001 palignr $10, %xmm6, %xmm7
1002 movaps %xmm7, 0x50(%rdi)
1003 palignr $10, %xmm5, %xmm6
1004 movaps %xmm6, 0x40(%rdi)
1005 palignr $10, %xmm4, %xmm5
1006 movaps %xmm5, 0x30(%rdi)
1007 palignr $10, %xmm3, %xmm4
1008 movaps %xmm4, 0x20(%rdi)
1009 palignr $10, %xmm2, %xmm3
1010 movaps %xmm3, 0x10(%rdi)
1011 palignr $10, %xmm1, %xmm2
1012 movaps %xmm2, (%rdi)
1013 lea 0x80(%rdi), %rdi
1014 jae L(shl_10)
1015 movdqu %xmm0, (%r8)
1016 add $0x80, %rdx
1017 add %rdx, %rdi
1018 add %rdx, %rsi
1019 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1020
1021 .p2align 4
1022 L(shl_10_bwd):
1023 movaps -0x0a(%rsi), %xmm1
1024
1025 movaps -0x1a(%rsi), %xmm2
1026 palignr $10, %xmm2, %xmm1
1027 movaps %xmm1, -0x10(%rdi)
1028
1029 movaps -0x2a(%rsi), %xmm3
1030 palignr $10, %xmm3, %xmm2
1031 movaps %xmm2, -0x20(%rdi)
1032
1033 movaps -0x3a(%rsi), %xmm4
1034 palignr $10, %xmm4, %xmm3
1035 movaps %xmm3, -0x30(%rdi)
1036
1037 movaps -0x4a(%rsi), %xmm5
1038 palignr $10, %xmm5, %xmm4
1039 movaps %xmm4, -0x40(%rdi)
1040
1041 movaps -0x5a(%rsi), %xmm6
1042 palignr $10, %xmm6, %xmm5
1043 movaps %xmm5, -0x50(%rdi)
1044
1045 movaps -0x6a(%rsi), %xmm7
1046 palignr $10, %xmm7, %xmm6
1047 movaps %xmm6, -0x60(%rdi)
1048
1049 movaps -0x7a(%rsi), %xmm8
1050 palignr $10, %xmm8, %xmm7
1051 movaps %xmm7, -0x70(%rdi)
1052
1053 movaps -0x8a(%rsi), %xmm9
1054 palignr $10, %xmm9, %xmm8
1055 movaps %xmm8, -0x80(%rdi)
1056
1057 sub $0x80, %rdx
1058 lea -0x80(%rdi), %rdi
1059 lea -0x80(%rsi), %rsi
1060 jae L(shl_10_bwd)
1061 movdqu %xmm0, (%r8)
1062 add $0x80, %rdx
1063 sub %rdx, %rdi
1064 sub %rdx, %rsi
1065 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1066
1067 .p2align 4
1068 L(shl_11):
1069 sub $0x80, %rdx
1070 movaps -0x0b(%rsi), %xmm1
1071 movaps 0x05(%rsi), %xmm2
1072 movaps 0x15(%rsi), %xmm3
1073 movaps 0x25(%rsi), %xmm4
1074 movaps 0x35(%rsi), %xmm5
1075 movaps 0x45(%rsi), %xmm6
1076 movaps 0x55(%rsi), %xmm7
1077 movaps 0x65(%rsi), %xmm8
1078 movaps 0x75(%rsi), %xmm9
1079 lea 0x80(%rsi), %rsi
1080 palignr $11, %xmm8, %xmm9
1081 movaps %xmm9, 0x70(%rdi)
1082 palignr $11, %xmm7, %xmm8
1083 movaps %xmm8, 0x60(%rdi)
1084 palignr $11, %xmm6, %xmm7
1085 movaps %xmm7, 0x50(%rdi)
1086 palignr $11, %xmm5, %xmm6
1087 movaps %xmm6, 0x40(%rdi)
1088 palignr $11, %xmm4, %xmm5
1089 movaps %xmm5, 0x30(%rdi)
1090 palignr $11, %xmm3, %xmm4
1091 movaps %xmm4, 0x20(%rdi)
1092 palignr $11, %xmm2, %xmm3
1093 movaps %xmm3, 0x10(%rdi)
1094 palignr $11, %xmm1, %xmm2
1095 movaps %xmm2, (%rdi)
1096 lea 0x80(%rdi), %rdi
1097 jae L(shl_11)
1098 movdqu %xmm0, (%r8)
1099 add $0x80, %rdx
1100 add %rdx, %rdi
1101 add %rdx, %rsi
1102 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1103
1104 .p2align 4
1105 L(shl_11_bwd):
1106 movaps -0x0b(%rsi), %xmm1
1107
1108 movaps -0x1b(%rsi), %xmm2
1109 palignr $11, %xmm2, %xmm1
1110 movaps %xmm1, -0x10(%rdi)
1111
1112 movaps -0x2b(%rsi), %xmm3
1113 palignr $11, %xmm3, %xmm2
1114 movaps %xmm2, -0x20(%rdi)
1115
1116 movaps -0x3b(%rsi), %xmm4
1117 palignr $11, %xmm4, %xmm3
1118 movaps %xmm3, -0x30(%rdi)
1119
1120 movaps -0x4b(%rsi), %xmm5
1121 palignr $11, %xmm5, %xmm4
1122 movaps %xmm4, -0x40(%rdi)
1123
1124 movaps -0x5b(%rsi), %xmm6
1125 palignr $11, %xmm6, %xmm5
1126 movaps %xmm5, -0x50(%rdi)
1127
1128 movaps -0x6b(%rsi), %xmm7
1129 palignr $11, %xmm7, %xmm6
1130 movaps %xmm6, -0x60(%rdi)
1131
1132 movaps -0x7b(%rsi), %xmm8
1133 palignr $11, %xmm8, %xmm7
1134 movaps %xmm7, -0x70(%rdi)
1135
1136 movaps -0x8b(%rsi), %xmm9
1137 palignr $11, %xmm9, %xmm8
1138 movaps %xmm8, -0x80(%rdi)
1139
1140 sub $0x80, %rdx
1141 lea -0x80(%rdi), %rdi
1142 lea -0x80(%rsi), %rsi
1143 jae L(shl_11_bwd)
1144 movdqu %xmm0, (%r8)
1145 add $0x80, %rdx
1146 sub %rdx, %rdi
1147 sub %rdx, %rsi
1148 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1149
1150 .p2align 4
1151 L(shl_12):
1152 sub $0x80, %rdx
1153 movdqa -0x0c(%rsi), %xmm1
1154 movaps 0x04(%rsi), %xmm2
1155 movaps 0x14(%rsi), %xmm3
1156 movaps 0x24(%rsi), %xmm4
1157 movaps 0x34(%rsi), %xmm5
1158 movaps 0x44(%rsi), %xmm6
1159 movaps 0x54(%rsi), %xmm7
1160 movaps 0x64(%rsi), %xmm8
1161 movaps 0x74(%rsi), %xmm9
1162 lea 0x80(%rsi), %rsi
1163 palignr $12, %xmm8, %xmm9
1164 movaps %xmm9, 0x70(%rdi)
1165 palignr $12, %xmm7, %xmm8
1166 movaps %xmm8, 0x60(%rdi)
1167 palignr $12, %xmm6, %xmm7
1168 movaps %xmm7, 0x50(%rdi)
1169 palignr $12, %xmm5, %xmm6
1170 movaps %xmm6, 0x40(%rdi)
1171 palignr $12, %xmm4, %xmm5
1172 movaps %xmm5, 0x30(%rdi)
1173 palignr $12, %xmm3, %xmm4
1174 movaps %xmm4, 0x20(%rdi)
1175 palignr $12, %xmm2, %xmm3
1176 movaps %xmm3, 0x10(%rdi)
1177 palignr $12, %xmm1, %xmm2
1178 movaps %xmm2, (%rdi)
1179
1180 lea 0x80(%rdi), %rdi
1181 jae L(shl_12)
1182 movdqu %xmm0, (%r8)
1183 add $0x80, %rdx
1184 add %rdx, %rdi
1185 add %rdx, %rsi
1186 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1187
1188 .p2align 4
1189 L(shl_12_bwd):
1190 movaps -0x0c(%rsi), %xmm1
1191
1192 movaps -0x1c(%rsi), %xmm2
1193 palignr $12, %xmm2, %xmm1
1194 movaps %xmm1, -0x10(%rdi)
1195
1196 movaps -0x2c(%rsi), %xmm3
1197 palignr $12, %xmm3, %xmm2
1198 movaps %xmm2, -0x20(%rdi)
1199
1200 movaps -0x3c(%rsi), %xmm4
1201 palignr $12, %xmm4, %xmm3
1202 movaps %xmm3, -0x30(%rdi)
1203
1204 movaps -0x4c(%rsi), %xmm5
1205 palignr $12, %xmm5, %xmm4
1206 movaps %xmm4, -0x40(%rdi)
1207
1208 movaps -0x5c(%rsi), %xmm6
1209 palignr $12, %xmm6, %xmm5
1210 movaps %xmm5, -0x50(%rdi)
1211
1212 movaps -0x6c(%rsi), %xmm7
1213 palignr $12, %xmm7, %xmm6
1214 movaps %xmm6, -0x60(%rdi)
1215
1216 movaps -0x7c(%rsi), %xmm8
1217 palignr $12, %xmm8, %xmm7
1218 movaps %xmm7, -0x70(%rdi)
1219
1220 movaps -0x8c(%rsi), %xmm9
1221 palignr $12, %xmm9, %xmm8
1222 movaps %xmm8, -0x80(%rdi)
1223
1224 sub $0x80, %rdx
1225 lea -0x80(%rdi), %rdi
1226 lea -0x80(%rsi), %rsi
1227 jae L(shl_12_bwd)
1228 movdqu %xmm0, (%r8)
1229 add $0x80, %rdx
1230 sub %rdx, %rdi
1231 sub %rdx, %rsi
1232 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1233
1234 .p2align 4
1235 L(shl_13):
1236 sub $0x80, %rdx
1237 movaps -0x0d(%rsi), %xmm1
1238 movaps 0x03(%rsi), %xmm2
1239 movaps 0x13(%rsi), %xmm3
1240 movaps 0x23(%rsi), %xmm4
1241 movaps 0x33(%rsi), %xmm5
1242 movaps 0x43(%rsi), %xmm6
1243 movaps 0x53(%rsi), %xmm7
1244 movaps 0x63(%rsi), %xmm8
1245 movaps 0x73(%rsi), %xmm9
1246 lea 0x80(%rsi), %rsi
1247 palignr $13, %xmm8, %xmm9
1248 movaps %xmm9, 0x70(%rdi)
1249 palignr $13, %xmm7, %xmm8
1250 movaps %xmm8, 0x60(%rdi)
1251 palignr $13, %xmm6, %xmm7
1252 movaps %xmm7, 0x50(%rdi)
1253 palignr $13, %xmm5, %xmm6
1254 movaps %xmm6, 0x40(%rdi)
1255 palignr $13, %xmm4, %xmm5
1256 movaps %xmm5, 0x30(%rdi)
1257 palignr $13, %xmm3, %xmm4
1258 movaps %xmm4, 0x20(%rdi)
1259 palignr $13, %xmm2, %xmm3
1260 movaps %xmm3, 0x10(%rdi)
1261 palignr $13, %xmm1, %xmm2
1262 movaps %xmm2, (%rdi)
1263 lea 0x80(%rdi), %rdi
1264 jae L(shl_13)
1265 movdqu %xmm0, (%r8)
1266 add $0x80, %rdx
1267 add %rdx, %rdi
1268 add %rdx, %rsi
1269 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1270
1271 .p2align 4
1272 L(shl_13_bwd):
1273 movaps -0x0d(%rsi), %xmm1
1274
1275 movaps -0x1d(%rsi), %xmm2
1276 palignr $13, %xmm2, %xmm1
1277 movaps %xmm1, -0x10(%rdi)
1278
1279 movaps -0x2d(%rsi), %xmm3
1280 palignr $13, %xmm3, %xmm2
1281 movaps %xmm2, -0x20(%rdi)
1282
1283 movaps -0x3d(%rsi), %xmm4
1284 palignr $13, %xmm4, %xmm3
1285 movaps %xmm3, -0x30(%rdi)
1286
1287 movaps -0x4d(%rsi), %xmm5
1288 palignr $13, %xmm5, %xmm4
1289 movaps %xmm4, -0x40(%rdi)
1290
1291 movaps -0x5d(%rsi), %xmm6
1292 palignr $13, %xmm6, %xmm5
1293 movaps %xmm5, -0x50(%rdi)
1294
1295 movaps -0x6d(%rsi), %xmm7
1296 palignr $13, %xmm7, %xmm6
1297 movaps %xmm6, -0x60(%rdi)
1298
1299 movaps -0x7d(%rsi), %xmm8
1300 palignr $13, %xmm8, %xmm7
1301 movaps %xmm7, -0x70(%rdi)
1302
1303 movaps -0x8d(%rsi), %xmm9
1304 palignr $13, %xmm9, %xmm8
1305 movaps %xmm8, -0x80(%rdi)
1306
1307 sub $0x80, %rdx
1308 lea -0x80(%rdi), %rdi
1309 lea -0x80(%rsi), %rsi
1310 jae L(shl_13_bwd)
1311 movdqu %xmm0, (%r8)
1312 add $0x80, %rdx
1313 sub %rdx, %rdi
1314 sub %rdx, %rsi
1315 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1316
1317 .p2align 4
1318 L(shl_14):
1319 sub $0x80, %rdx
1320 movaps -0x0e(%rsi), %xmm1
1321 movaps 0x02(%rsi), %xmm2
1322 movaps 0x12(%rsi), %xmm3
1323 movaps 0x22(%rsi), %xmm4
1324 movaps 0x32(%rsi), %xmm5
1325 movaps 0x42(%rsi), %xmm6
1326 movaps 0x52(%rsi), %xmm7
1327 movaps 0x62(%rsi), %xmm8
1328 movaps 0x72(%rsi), %xmm9
1329 lea 0x80(%rsi), %rsi
1330 palignr $14, %xmm8, %xmm9
1331 movaps %xmm9, 0x70(%rdi)
1332 palignr $14, %xmm7, %xmm8
1333 movaps %xmm8, 0x60(%rdi)
1334 palignr $14, %xmm6, %xmm7
1335 movaps %xmm7, 0x50(%rdi)
1336 palignr $14, %xmm5, %xmm6
1337 movaps %xmm6, 0x40(%rdi)
1338 palignr $14, %xmm4, %xmm5
1339 movaps %xmm5, 0x30(%rdi)
1340 palignr $14, %xmm3, %xmm4
1341 movaps %xmm4, 0x20(%rdi)
1342 palignr $14, %xmm2, %xmm3
1343 movaps %xmm3, 0x10(%rdi)
1344 palignr $14, %xmm1, %xmm2
1345 movaps %xmm2, (%rdi)
1346 lea 0x80(%rdi), %rdi
1347 jae L(shl_14)
1348 movdqu %xmm0, (%r8)
1349 add $0x80, %rdx
1350 add %rdx, %rdi
1351 add %rdx, %rsi
1352 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1353
1354 .p2align 4
1355 L(shl_14_bwd):
1356 movaps -0x0e(%rsi), %xmm1
1357
1358 movaps -0x1e(%rsi), %xmm2
1359 palignr $14, %xmm2, %xmm1
1360 movaps %xmm1, -0x10(%rdi)
1361
1362 movaps -0x2e(%rsi), %xmm3
1363 palignr $14, %xmm3, %xmm2
1364 movaps %xmm2, -0x20(%rdi)
1365
1366 movaps -0x3e(%rsi), %xmm4
1367 palignr $14, %xmm4, %xmm3
1368 movaps %xmm3, -0x30(%rdi)
1369
1370 movaps -0x4e(%rsi), %xmm5
1371 palignr $14, %xmm5, %xmm4
1372 movaps %xmm4, -0x40(%rdi)
1373
1374 movaps -0x5e(%rsi), %xmm6
1375 palignr $14, %xmm6, %xmm5
1376 movaps %xmm5, -0x50(%rdi)
1377
1378 movaps -0x6e(%rsi), %xmm7
1379 palignr $14, %xmm7, %xmm6
1380 movaps %xmm6, -0x60(%rdi)
1381
1382 movaps -0x7e(%rsi), %xmm8
1383 palignr $14, %xmm8, %xmm7
1384 movaps %xmm7, -0x70(%rdi)
1385
1386 movaps -0x8e(%rsi), %xmm9
1387 palignr $14, %xmm9, %xmm8
1388 movaps %xmm8, -0x80(%rdi)
1389
1390 sub $0x80, %rdx
1391 lea -0x80(%rdi), %rdi
1392 lea -0x80(%rsi), %rsi
1393 jae L(shl_14_bwd)
1394 movdqu %xmm0, (%r8)
1395 add $0x80, %rdx
1396 sub %rdx, %rdi
1397 sub %rdx, %rsi
1398 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1399
1400 .p2align 4
1401 L(shl_15):
1402 sub $0x80, %rdx
1403 movaps -0x0f(%rsi), %xmm1
1404 movaps 0x01(%rsi), %xmm2
1405 movaps 0x11(%rsi), %xmm3
1406 movaps 0x21(%rsi), %xmm4
1407 movaps 0x31(%rsi), %xmm5
1408 movaps 0x41(%rsi), %xmm6
1409 movaps 0x51(%rsi), %xmm7
1410 movaps 0x61(%rsi), %xmm8
1411 movaps 0x71(%rsi), %xmm9
1412 lea 0x80(%rsi), %rsi
1413 palignr $15, %xmm8, %xmm9
1414 movaps %xmm9, 0x70(%rdi)
1415 palignr $15, %xmm7, %xmm8
1416 movaps %xmm8, 0x60(%rdi)
1417 palignr $15, %xmm6, %xmm7
1418 movaps %xmm7, 0x50(%rdi)
1419 palignr $15, %xmm5, %xmm6
1420 movaps %xmm6, 0x40(%rdi)
1421 palignr $15, %xmm4, %xmm5
1422 movaps %xmm5, 0x30(%rdi)
1423 palignr $15, %xmm3, %xmm4
1424 movaps %xmm4, 0x20(%rdi)
1425 palignr $15, %xmm2, %xmm3
1426 movaps %xmm3, 0x10(%rdi)
1427 palignr $15, %xmm1, %xmm2
1428 movaps %xmm2, (%rdi)
1429 lea 0x80(%rdi), %rdi
1430 jae L(shl_15)
1431 movdqu %xmm0, (%r8)
1432 add $0x80, %rdx
1433 add %rdx, %rdi
1434 add %rdx, %rsi
1435 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1436
1437 .p2align 4
1438 L(shl_15_bwd):
1439 movaps -0x0f(%rsi), %xmm1
1440
1441 movaps -0x1f(%rsi), %xmm2
1442 palignr $15, %xmm2, %xmm1
1443 movaps %xmm1, -0x10(%rdi)
1444
1445 movaps -0x2f(%rsi), %xmm3
1446 palignr $15, %xmm3, %xmm2
1447 movaps %xmm2, -0x20(%rdi)
1448
1449 movaps -0x3f(%rsi), %xmm4
1450 palignr $15, %xmm4, %xmm3
1451 movaps %xmm3, -0x30(%rdi)
1452
1453 movaps -0x4f(%rsi), %xmm5
1454 palignr $15, %xmm5, %xmm4
1455 movaps %xmm4, -0x40(%rdi)
1456
1457 movaps -0x5f(%rsi), %xmm6
1458 palignr $15, %xmm6, %xmm5
1459 movaps %xmm5, -0x50(%rdi)
1460
1461 movaps -0x6f(%rsi), %xmm7
1462 palignr $15, %xmm7, %xmm6
1463 movaps %xmm6, -0x60(%rdi)
1464
1465 movaps -0x7f(%rsi), %xmm8
1466 palignr $15, %xmm8, %xmm7
1467 movaps %xmm7, -0x70(%rdi)
1468
1469 movaps -0x8f(%rsi), %xmm9
1470 palignr $15, %xmm9, %xmm8
1471 movaps %xmm8, -0x80(%rdi)
1472
1473 sub $0x80, %rdx
1474 lea -0x80(%rdi), %rdi
1475 lea -0x80(%rsi), %rsi
1476 jae L(shl_15_bwd)
1477 movdqu %xmm0, (%r8)
1478 add $0x80, %rdx
1479 sub %rdx, %rdi
1480 sub %rdx, %rsi
1481 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1482
1483 .p2align 4
1484 L(gobble_mem_fwd):
1485 movdqu (%rsi), %xmm1
1486 movdqu %xmm0, (%r8)
1487 movdqa %xmm1, (%rdi)
1488 sub $16, %rdx
1489 add $16, %rsi
1490 add $16, %rdi
1491
1492 #ifdef SHARED_CACHE_SIZE_HALF
1493 mov $SHARED_CACHE_SIZE_HALF, %RCX_LP
1494 #else
1495 mov __x86_shared_cache_size_half(%rip), %RCX_LP
1496 #endif
1497 #ifdef USE_AS_MEMMOVE
1498 mov %rsi, %r9
1499 sub %rdi, %r9
1500 cmp %rdx, %r9
1501 jae L(memmove_is_memcpy_fwd)
1502 cmp %rcx, %r9
1503 jbe L(ll_cache_copy_fwd_start)
1504 L(memmove_is_memcpy_fwd):
1505 #endif
1506 cmp %rcx, %rdx
1507 ja L(bigger_in_fwd)
1508 mov %rdx, %rcx
1509 L(bigger_in_fwd):
1510 sub %rcx, %rdx
1511 cmp $0x1000, %rdx
1512 jbe L(ll_cache_copy_fwd)
1513
1514 mov %rcx, %r9
1515 shl $3, %r9
1516 cmp %r9, %rdx
1517 jbe L(2steps_copy_fwd)
1518 add %rcx, %rdx
1519 xor %rcx, %rcx
1520 L(2steps_copy_fwd):
1521 sub $0x80, %rdx
1522 L(gobble_mem_fwd_loop):
1523 sub $0x80, %rdx
1524 prefetcht0 0x200(%rsi)
1525 prefetcht0 0x300(%rsi)
1526 movdqu (%rsi), %xmm0
1527 movdqu 0x10(%rsi), %xmm1
1528 movdqu 0x20(%rsi), %xmm2
1529 movdqu 0x30(%rsi), %xmm3
1530 movdqu 0x40(%rsi), %xmm4
1531 movdqu 0x50(%rsi), %xmm5
1532 movdqu 0x60(%rsi), %xmm6
1533 movdqu 0x70(%rsi), %xmm7
1534 lfence
1535 movntdq %xmm0, (%rdi)
1536 movntdq %xmm1, 0x10(%rdi)
1537 movntdq %xmm2, 0x20(%rdi)
1538 movntdq %xmm3, 0x30(%rdi)
1539 movntdq %xmm4, 0x40(%rdi)
1540 movntdq %xmm5, 0x50(%rdi)
1541 movntdq %xmm6, 0x60(%rdi)
1542 movntdq %xmm7, 0x70(%rdi)
1543 lea 0x80(%rsi), %rsi
1544 lea 0x80(%rdi), %rdi
1545 jae L(gobble_mem_fwd_loop)
1546 sfence
1547 cmp $0x80, %rcx
1548 jb L(gobble_mem_fwd_end)
1549 add $0x80, %rdx
1550 L(ll_cache_copy_fwd):
1551 add %rcx, %rdx
1552 L(ll_cache_copy_fwd_start):
1553 sub $0x80, %rdx
1554 L(gobble_ll_loop_fwd):
1555 prefetchnta 0x1c0(%rsi)
1556 prefetchnta 0x280(%rsi)
1557 prefetchnta 0x1c0(%rdi)
1558 prefetchnta 0x280(%rdi)
1559 sub $0x80, %rdx
1560 movdqu (%rsi), %xmm0
1561 movdqu 0x10(%rsi), %xmm1
1562 movdqu 0x20(%rsi), %xmm2
1563 movdqu 0x30(%rsi), %xmm3
1564 movdqu 0x40(%rsi), %xmm4
1565 movdqu 0x50(%rsi), %xmm5
1566 movdqu 0x60(%rsi), %xmm6
1567 movdqu 0x70(%rsi), %xmm7
1568 movdqa %xmm0, (%rdi)
1569 movdqa %xmm1, 0x10(%rdi)
1570 movdqa %xmm2, 0x20(%rdi)
1571 movdqa %xmm3, 0x30(%rdi)
1572 movdqa %xmm4, 0x40(%rdi)
1573 movdqa %xmm5, 0x50(%rdi)
1574 movdqa %xmm6, 0x60(%rdi)
1575 movdqa %xmm7, 0x70(%rdi)
1576 lea 0x80(%rsi), %rsi
1577 lea 0x80(%rdi), %rdi
1578 jae L(gobble_ll_loop_fwd)
1579 L(gobble_mem_fwd_end):
1580 add $0x80, %rdx
1581 add %rdx, %rsi
1582 add %rdx, %rdi
1583 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1584
1585 .p2align 4
1586 L(gobble_mem_bwd):
1587 add %rdx, %rsi
1588 add %rdx, %rdi
1589
1590 movdqu -16(%rsi), %xmm0
1591 lea -16(%rdi), %r8
1592 mov %rdi, %r9
1593 and $-16, %rdi
1594 sub %rdi, %r9
1595 sub %r9, %rsi
1596 sub %r9, %rdx
1597
1598
1599 #ifdef SHARED_CACHE_SIZE_HALF
1600 mov $SHARED_CACHE_SIZE_HALF, %RCX_LP
1601 #else
1602 mov __x86_shared_cache_size_half(%rip), %RCX_LP
1603 #endif
1604 #ifdef USE_AS_MEMMOVE
1605 mov %rdi, %r9
1606 sub %rsi, %r9
1607 cmp %rdx, %r9
1608 jae L(memmove_is_memcpy_bwd)
1609 cmp %rcx, %r9
1610 jbe L(ll_cache_copy_bwd_start)
1611 L(memmove_is_memcpy_bwd):
1612 #endif
1613 cmp %rcx, %rdx
1614 ja L(bigger)
1615 mov %rdx, %rcx
1616 L(bigger):
1617 sub %rcx, %rdx
1618 cmp $0x1000, %rdx
1619 jbe L(ll_cache_copy)
1620
1621 mov %rcx, %r9
1622 shl $3, %r9
1623 cmp %r9, %rdx
1624 jbe L(2steps_copy)
1625 add %rcx, %rdx
1626 xor %rcx, %rcx
1627 L(2steps_copy):
1628 sub $0x80, %rdx
1629 L(gobble_mem_bwd_loop):
1630 sub $0x80, %rdx
1631 prefetcht0 -0x200(%rsi)
1632 prefetcht0 -0x300(%rsi)
1633 movdqu -0x10(%rsi), %xmm1
1634 movdqu -0x20(%rsi), %xmm2
1635 movdqu -0x30(%rsi), %xmm3
1636 movdqu -0x40(%rsi), %xmm4
1637 movdqu -0x50(%rsi), %xmm5
1638 movdqu -0x60(%rsi), %xmm6
1639 movdqu -0x70(%rsi), %xmm7
1640 movdqu -0x80(%rsi), %xmm8
1641 lfence
1642 movntdq %xmm1, -0x10(%rdi)
1643 movntdq %xmm2, -0x20(%rdi)
1644 movntdq %xmm3, -0x30(%rdi)
1645 movntdq %xmm4, -0x40(%rdi)
1646 movntdq %xmm5, -0x50(%rdi)
1647 movntdq %xmm6, -0x60(%rdi)
1648 movntdq %xmm7, -0x70(%rdi)
1649 movntdq %xmm8, -0x80(%rdi)
1650 lea -0x80(%rsi), %rsi
1651 lea -0x80(%rdi), %rdi
1652 jae L(gobble_mem_bwd_loop)
1653 sfence
1654 cmp $0x80, %rcx
1655 jb L(gobble_mem_bwd_end)
1656 add $0x80, %rdx
1657 L(ll_cache_copy):
1658 add %rcx, %rdx
1659 L(ll_cache_copy_bwd_start):
1660 sub $0x80, %rdx
1661 L(gobble_ll_loop):
1662 prefetchnta -0x1c0(%rsi)
1663 prefetchnta -0x280(%rsi)
1664 prefetchnta -0x1c0(%rdi)
1665 prefetchnta -0x280(%rdi)
1666 sub $0x80, %rdx
1667 movdqu -0x10(%rsi), %xmm1
1668 movdqu -0x20(%rsi), %xmm2
1669 movdqu -0x30(%rsi), %xmm3
1670 movdqu -0x40(%rsi), %xmm4
1671 movdqu -0x50(%rsi), %xmm5
1672 movdqu -0x60(%rsi), %xmm6
1673 movdqu -0x70(%rsi), %xmm7
1674 movdqu -0x80(%rsi), %xmm8
1675 movdqa %xmm1, -0x10(%rdi)
1676 movdqa %xmm2, -0x20(%rdi)
1677 movdqa %xmm3, -0x30(%rdi)
1678 movdqa %xmm4, -0x40(%rdi)
1679 movdqa %xmm5, -0x50(%rdi)
1680 movdqa %xmm6, -0x60(%rdi)
1681 movdqa %xmm7, -0x70(%rdi)
1682 movdqa %xmm8, -0x80(%rdi)
1683 lea -0x80(%rsi), %rsi
1684 lea -0x80(%rdi), %rdi
1685 jae L(gobble_ll_loop)
1686 L(gobble_mem_bwd_end):
1687 movdqu %xmm0, (%r8)
1688 add $0x80, %rdx
1689 sub %rdx, %rsi
1690 sub %rdx, %rdi
1691 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1692
1693 .p2align 4
1694 L(fwd_write_128bytes):
1695 lddqu -128(%rsi), %xmm0
1696 movdqu %xmm0, -128(%rdi)
1697 L(fwd_write_112bytes):
1698 lddqu -112(%rsi), %xmm0
1699 movdqu %xmm0, -112(%rdi)
1700 L(fwd_write_96bytes):
1701 lddqu -96(%rsi), %xmm0
1702 movdqu %xmm0, -96(%rdi)
1703 L(fwd_write_80bytes):
1704 lddqu -80(%rsi), %xmm0
1705 movdqu %xmm0, -80(%rdi)
1706 L(fwd_write_64bytes):
1707 lddqu -64(%rsi), %xmm0
1708 movdqu %xmm0, -64(%rdi)
1709 L(fwd_write_48bytes):
1710 lddqu -48(%rsi), %xmm0
1711 movdqu %xmm0, -48(%rdi)
1712 L(fwd_write_32bytes):
1713 lddqu -32(%rsi), %xmm0
1714 movdqu %xmm0, -32(%rdi)
1715 L(fwd_write_16bytes):
1716 lddqu -16(%rsi), %xmm0
1717 movdqu %xmm0, -16(%rdi)
1718 L(fwd_write_0bytes):
1719 ret
1720
1721
1722 .p2align 4
1723 L(fwd_write_143bytes):
1724 lddqu -143(%rsi), %xmm0
1725 movdqu %xmm0, -143(%rdi)
1726 L(fwd_write_127bytes):
1727 lddqu -127(%rsi), %xmm0
1728 movdqu %xmm0, -127(%rdi)
1729 L(fwd_write_111bytes):
1730 lddqu -111(%rsi), %xmm0
1731 movdqu %xmm0, -111(%rdi)
1732 L(fwd_write_95bytes):
1733 lddqu -95(%rsi), %xmm0
1734 movdqu %xmm0, -95(%rdi)
1735 L(fwd_write_79bytes):
1736 lddqu -79(%rsi), %xmm0
1737 movdqu %xmm0, -79(%rdi)
1738 L(fwd_write_63bytes):
1739 lddqu -63(%rsi), %xmm0
1740 movdqu %xmm0, -63(%rdi)
1741 L(fwd_write_47bytes):
1742 lddqu -47(%rsi), %xmm0
1743 movdqu %xmm0, -47(%rdi)
1744 L(fwd_write_31bytes):
1745 lddqu -31(%rsi), %xmm0
1746 lddqu -16(%rsi), %xmm1
1747 movdqu %xmm0, -31(%rdi)
1748 movdqu %xmm1, -16(%rdi)
1749 ret
1750
1751 .p2align 4
1752 L(fwd_write_15bytes):
1753 mov -15(%rsi), %rdx
1754 mov -8(%rsi), %rcx
1755 mov %rdx, -15(%rdi)
1756 mov %rcx, -8(%rdi)
1757 ret
1758
1759 .p2align 4
1760 L(fwd_write_142bytes):
1761 lddqu -142(%rsi), %xmm0
1762 movdqu %xmm0, -142(%rdi)
1763 L(fwd_write_126bytes):
1764 lddqu -126(%rsi), %xmm0
1765 movdqu %xmm0, -126(%rdi)
1766 L(fwd_write_110bytes):
1767 lddqu -110(%rsi), %xmm0
1768 movdqu %xmm0, -110(%rdi)
1769 L(fwd_write_94bytes):
1770 lddqu -94(%rsi), %xmm0
1771 movdqu %xmm0, -94(%rdi)
1772 L(fwd_write_78bytes):
1773 lddqu -78(%rsi), %xmm0
1774 movdqu %xmm0, -78(%rdi)
1775 L(fwd_write_62bytes):
1776 lddqu -62(%rsi), %xmm0
1777 movdqu %xmm0, -62(%rdi)
1778 L(fwd_write_46bytes):
1779 lddqu -46(%rsi), %xmm0
1780 movdqu %xmm0, -46(%rdi)
1781 L(fwd_write_30bytes):
1782 lddqu -30(%rsi), %xmm0
1783 lddqu -16(%rsi), %xmm1
1784 movdqu %xmm0, -30(%rdi)
1785 movdqu %xmm1, -16(%rdi)
1786 ret
1787
1788 .p2align 4
1789 L(fwd_write_14bytes):
1790 mov -14(%rsi), %rdx
1791 mov -8(%rsi), %rcx
1792 mov %rdx, -14(%rdi)
1793 mov %rcx, -8(%rdi)
1794 ret
1795
1796 .p2align 4
1797 L(fwd_write_141bytes):
1798 lddqu -141(%rsi), %xmm0
1799 movdqu %xmm0, -141(%rdi)
1800 L(fwd_write_125bytes):
1801 lddqu -125(%rsi), %xmm0
1802 movdqu %xmm0, -125(%rdi)
1803 L(fwd_write_109bytes):
1804 lddqu -109(%rsi), %xmm0
1805 movdqu %xmm0, -109(%rdi)
1806 L(fwd_write_93bytes):
1807 lddqu -93(%rsi), %xmm0
1808 movdqu %xmm0, -93(%rdi)
1809 L(fwd_write_77bytes):
1810 lddqu -77(%rsi), %xmm0
1811 movdqu %xmm0, -77(%rdi)
1812 L(fwd_write_61bytes):
1813 lddqu -61(%rsi), %xmm0
1814 movdqu %xmm0, -61(%rdi)
1815 L(fwd_write_45bytes):
1816 lddqu -45(%rsi), %xmm0
1817 movdqu %xmm0, -45(%rdi)
1818 L(fwd_write_29bytes):
1819 lddqu -29(%rsi), %xmm0
1820 lddqu -16(%rsi), %xmm1
1821 movdqu %xmm0, -29(%rdi)
1822 movdqu %xmm1, -16(%rdi)
1823 ret
1824
1825 .p2align 4
1826 L(fwd_write_13bytes):
1827 mov -13(%rsi), %rdx
1828 mov -8(%rsi), %rcx
1829 mov %rdx, -13(%rdi)
1830 mov %rcx, -8(%rdi)
1831 ret
1832
1833 .p2align 4
1834 L(fwd_write_140bytes):
1835 lddqu -140(%rsi), %xmm0
1836 movdqu %xmm0, -140(%rdi)
1837 L(fwd_write_124bytes):
1838 lddqu -124(%rsi), %xmm0
1839 movdqu %xmm0, -124(%rdi)
1840 L(fwd_write_108bytes):
1841 lddqu -108(%rsi), %xmm0
1842 movdqu %xmm0, -108(%rdi)
1843 L(fwd_write_92bytes):
1844 lddqu -92(%rsi), %xmm0
1845 movdqu %xmm0, -92(%rdi)
1846 L(fwd_write_76bytes):
1847 lddqu -76(%rsi), %xmm0
1848 movdqu %xmm0, -76(%rdi)
1849 L(fwd_write_60bytes):
1850 lddqu -60(%rsi), %xmm0
1851 movdqu %xmm0, -60(%rdi)
1852 L(fwd_write_44bytes):
1853 lddqu -44(%rsi), %xmm0
1854 movdqu %xmm0, -44(%rdi)
1855 L(fwd_write_28bytes):
1856 lddqu -28(%rsi), %xmm0
1857 lddqu -16(%rsi), %xmm1
1858 movdqu %xmm0, -28(%rdi)
1859 movdqu %xmm1, -16(%rdi)
1860 ret
1861
1862 .p2align 4
1863 L(fwd_write_12bytes):
1864 mov -12(%rsi), %rdx
1865 mov -4(%rsi), %ecx
1866 mov %rdx, -12(%rdi)
1867 mov %ecx, -4(%rdi)
1868 ret
1869
1870 .p2align 4
1871 L(fwd_write_139bytes):
1872 lddqu -139(%rsi), %xmm0
1873 movdqu %xmm0, -139(%rdi)
1874 L(fwd_write_123bytes):
1875 lddqu -123(%rsi), %xmm0
1876 movdqu %xmm0, -123(%rdi)
1877 L(fwd_write_107bytes):
1878 lddqu -107(%rsi), %xmm0
1879 movdqu %xmm0, -107(%rdi)
1880 L(fwd_write_91bytes):
1881 lddqu -91(%rsi), %xmm0
1882 movdqu %xmm0, -91(%rdi)
1883 L(fwd_write_75bytes):
1884 lddqu -75(%rsi), %xmm0
1885 movdqu %xmm0, -75(%rdi)
1886 L(fwd_write_59bytes):
1887 lddqu -59(%rsi), %xmm0
1888 movdqu %xmm0, -59(%rdi)
1889 L(fwd_write_43bytes):
1890 lddqu -43(%rsi), %xmm0
1891 movdqu %xmm0, -43(%rdi)
1892 L(fwd_write_27bytes):
1893 lddqu -27(%rsi), %xmm0
1894 lddqu -16(%rsi), %xmm1
1895 movdqu %xmm0, -27(%rdi)
1896 movdqu %xmm1, -16(%rdi)
1897 ret
1898
1899 .p2align 4
1900 L(fwd_write_11bytes):
1901 mov -11(%rsi), %rdx
1902 mov -4(%rsi), %ecx
1903 mov %rdx, -11(%rdi)
1904 mov %ecx, -4(%rdi)
1905 ret
1906
1907 .p2align 4
1908 L(fwd_write_138bytes):
1909 lddqu -138(%rsi), %xmm0
1910 movdqu %xmm0, -138(%rdi)
1911 L(fwd_write_122bytes):
1912 lddqu -122(%rsi), %xmm0
1913 movdqu %xmm0, -122(%rdi)
1914 L(fwd_write_106bytes):
1915 lddqu -106(%rsi), %xmm0
1916 movdqu %xmm0, -106(%rdi)
1917 L(fwd_write_90bytes):
1918 lddqu -90(%rsi), %xmm0
1919 movdqu %xmm0, -90(%rdi)
1920 L(fwd_write_74bytes):
1921 lddqu -74(%rsi), %xmm0
1922 movdqu %xmm0, -74(%rdi)
1923 L(fwd_write_58bytes):
1924 lddqu -58(%rsi), %xmm0
1925 movdqu %xmm0, -58(%rdi)
1926 L(fwd_write_42bytes):
1927 lddqu -42(%rsi), %xmm0
1928 movdqu %xmm0, -42(%rdi)
1929 L(fwd_write_26bytes):
1930 lddqu -26(%rsi), %xmm0
1931 lddqu -16(%rsi), %xmm1
1932 movdqu %xmm0, -26(%rdi)
1933 movdqu %xmm1, -16(%rdi)
1934 ret
1935
1936 .p2align 4
1937 L(fwd_write_10bytes):
1938 mov -10(%rsi), %rdx
1939 mov -4(%rsi), %ecx
1940 mov %rdx, -10(%rdi)
1941 mov %ecx, -4(%rdi)
1942 ret
1943
1944 .p2align 4
1945 L(fwd_write_137bytes):
1946 lddqu -137(%rsi), %xmm0
1947 movdqu %xmm0, -137(%rdi)
1948 L(fwd_write_121bytes):
1949 lddqu -121(%rsi), %xmm0
1950 movdqu %xmm0, -121(%rdi)
1951 L(fwd_write_105bytes):
1952 lddqu -105(%rsi), %xmm0
1953 movdqu %xmm0, -105(%rdi)
1954 L(fwd_write_89bytes):
1955 lddqu -89(%rsi), %xmm0
1956 movdqu %xmm0, -89(%rdi)
1957 L(fwd_write_73bytes):
1958 lddqu -73(%rsi), %xmm0
1959 movdqu %xmm0, -73(%rdi)
1960 L(fwd_write_57bytes):
1961 lddqu -57(%rsi), %xmm0
1962 movdqu %xmm0, -57(%rdi)
1963 L(fwd_write_41bytes):
1964 lddqu -41(%rsi), %xmm0
1965 movdqu %xmm0, -41(%rdi)
1966 L(fwd_write_25bytes):
1967 lddqu -25(%rsi), %xmm0
1968 lddqu -16(%rsi), %xmm1
1969 movdqu %xmm0, -25(%rdi)
1970 movdqu %xmm1, -16(%rdi)
1971 ret
1972
1973 .p2align 4
1974 L(fwd_write_9bytes):
1975 mov -9(%rsi), %rdx
1976 mov -4(%rsi), %ecx
1977 mov %rdx, -9(%rdi)
1978 mov %ecx, -4(%rdi)
1979 ret
1980
1981 .p2align 4
1982 L(fwd_write_136bytes):
1983 lddqu -136(%rsi), %xmm0
1984 movdqu %xmm0, -136(%rdi)
1985 L(fwd_write_120bytes):
1986 lddqu -120(%rsi), %xmm0
1987 movdqu %xmm0, -120(%rdi)
1988 L(fwd_write_104bytes):
1989 lddqu -104(%rsi), %xmm0
1990 movdqu %xmm0, -104(%rdi)
1991 L(fwd_write_88bytes):
1992 lddqu -88(%rsi), %xmm0
1993 movdqu %xmm0, -88(%rdi)
1994 L(fwd_write_72bytes):
1995 lddqu -72(%rsi), %xmm0
1996 movdqu %xmm0, -72(%rdi)
1997 L(fwd_write_56bytes):
1998 lddqu -56(%rsi), %xmm0
1999 movdqu %xmm0, -56(%rdi)
2000 L(fwd_write_40bytes):
2001 lddqu -40(%rsi), %xmm0
2002 movdqu %xmm0, -40(%rdi)
2003 L(fwd_write_24bytes):
2004 lddqu -24(%rsi), %xmm0
2005 lddqu -16(%rsi), %xmm1
2006 movdqu %xmm0, -24(%rdi)
2007 movdqu %xmm1, -16(%rdi)
2008 ret
2009
2010 .p2align 4
2011 L(fwd_write_8bytes):
2012 mov -8(%rsi), %rdx
2013 mov %rdx, -8(%rdi)
2014 ret
2015
2016 .p2align 4
2017 L(fwd_write_135bytes):
2018 lddqu -135(%rsi), %xmm0
2019 movdqu %xmm0, -135(%rdi)
2020 L(fwd_write_119bytes):
2021 lddqu -119(%rsi), %xmm0
2022 movdqu %xmm0, -119(%rdi)
2023 L(fwd_write_103bytes):
2024 lddqu -103(%rsi), %xmm0
2025 movdqu %xmm0, -103(%rdi)
2026 L(fwd_write_87bytes):
2027 lddqu -87(%rsi), %xmm0
2028 movdqu %xmm0, -87(%rdi)
2029 L(fwd_write_71bytes):
2030 lddqu -71(%rsi), %xmm0
2031 movdqu %xmm0, -71(%rdi)
2032 L(fwd_write_55bytes):
2033 lddqu -55(%rsi), %xmm0
2034 movdqu %xmm0, -55(%rdi)
2035 L(fwd_write_39bytes):
2036 lddqu -39(%rsi), %xmm0
2037 movdqu %xmm0, -39(%rdi)
2038 L(fwd_write_23bytes):
2039 lddqu -23(%rsi), %xmm0
2040 lddqu -16(%rsi), %xmm1
2041 movdqu %xmm0, -23(%rdi)
2042 movdqu %xmm1, -16(%rdi)
2043 ret
2044
2045 .p2align 4
2046 L(fwd_write_7bytes):
2047 mov -7(%rsi), %edx
2048 mov -4(%rsi), %ecx
2049 mov %edx, -7(%rdi)
2050 mov %ecx, -4(%rdi)
2051 ret
2052
2053 .p2align 4
2054 L(fwd_write_134bytes):
2055 lddqu -134(%rsi), %xmm0
2056 movdqu %xmm0, -134(%rdi)
2057 L(fwd_write_118bytes):
2058 lddqu -118(%rsi), %xmm0
2059 movdqu %xmm0, -118(%rdi)
2060 L(fwd_write_102bytes):
2061 lddqu -102(%rsi), %xmm0
2062 movdqu %xmm0, -102(%rdi)
2063 L(fwd_write_86bytes):
2064 lddqu -86(%rsi), %xmm0
2065 movdqu %xmm0, -86(%rdi)
2066 L(fwd_write_70bytes):
2067 lddqu -70(%rsi), %xmm0
2068 movdqu %xmm0, -70(%rdi)
2069 L(fwd_write_54bytes):
2070 lddqu -54(%rsi), %xmm0
2071 movdqu %xmm0, -54(%rdi)
2072 L(fwd_write_38bytes):
2073 lddqu -38(%rsi), %xmm0
2074 movdqu %xmm0, -38(%rdi)
2075 L(fwd_write_22bytes):
2076 lddqu -22(%rsi), %xmm0
2077 lddqu -16(%rsi), %xmm1
2078 movdqu %xmm0, -22(%rdi)
2079 movdqu %xmm1, -16(%rdi)
2080 ret
2081
2082 .p2align 4
2083 L(fwd_write_6bytes):
2084 mov -6(%rsi), %edx
2085 mov -4(%rsi), %ecx
2086 mov %edx, -6(%rdi)
2087 mov %ecx, -4(%rdi)
2088 ret
2089
2090 .p2align 4
2091 L(fwd_write_133bytes):
2092 lddqu -133(%rsi), %xmm0
2093 movdqu %xmm0, -133(%rdi)
2094 L(fwd_write_117bytes):
2095 lddqu -117(%rsi), %xmm0
2096 movdqu %xmm0, -117(%rdi)
2097 L(fwd_write_101bytes):
2098 lddqu -101(%rsi), %xmm0
2099 movdqu %xmm0, -101(%rdi)
2100 L(fwd_write_85bytes):
2101 lddqu -85(%rsi), %xmm0
2102 movdqu %xmm0, -85(%rdi)
2103 L(fwd_write_69bytes):
2104 lddqu -69(%rsi), %xmm0
2105 movdqu %xmm0, -69(%rdi)
2106 L(fwd_write_53bytes):
2107 lddqu -53(%rsi), %xmm0
2108 movdqu %xmm0, -53(%rdi)
2109 L(fwd_write_37bytes):
2110 lddqu -37(%rsi), %xmm0
2111 movdqu %xmm0, -37(%rdi)
2112 L(fwd_write_21bytes):
2113 lddqu -21(%rsi), %xmm0
2114 lddqu -16(%rsi), %xmm1
2115 movdqu %xmm0, -21(%rdi)
2116 movdqu %xmm1, -16(%rdi)
2117 ret
2118
2119 .p2align 4
2120 L(fwd_write_5bytes):
2121 mov -5(%rsi), %edx
2122 mov -4(%rsi), %ecx
2123 mov %edx, -5(%rdi)
2124 mov %ecx, -4(%rdi)
2125 ret
2126
2127 .p2align 4
2128 L(fwd_write_132bytes):
2129 lddqu -132(%rsi), %xmm0
2130 movdqu %xmm0, -132(%rdi)
2131 L(fwd_write_116bytes):
2132 lddqu -116(%rsi), %xmm0
2133 movdqu %xmm0, -116(%rdi)
2134 L(fwd_write_100bytes):
2135 lddqu -100(%rsi), %xmm0
2136 movdqu %xmm0, -100(%rdi)
2137 L(fwd_write_84bytes):
2138 lddqu -84(%rsi), %xmm0
2139 movdqu %xmm0, -84(%rdi)
2140 L(fwd_write_68bytes):
2141 lddqu -68(%rsi), %xmm0
2142 movdqu %xmm0, -68(%rdi)
2143 L(fwd_write_52bytes):
2144 lddqu -52(%rsi), %xmm0
2145 movdqu %xmm0, -52(%rdi)
2146 L(fwd_write_36bytes):
2147 lddqu -36(%rsi), %xmm0
2148 movdqu %xmm0, -36(%rdi)
2149 L(fwd_write_20bytes):
2150 lddqu -20(%rsi), %xmm0
2151 lddqu -16(%rsi), %xmm1
2152 movdqu %xmm0, -20(%rdi)
2153 movdqu %xmm1, -16(%rdi)
2154 ret
2155
2156 .p2align 4
2157 L(fwd_write_4bytes):
2158 mov -4(%rsi), %edx
2159 mov %edx, -4(%rdi)
2160 ret
2161
2162 .p2align 4
2163 L(fwd_write_131bytes):
2164 lddqu -131(%rsi), %xmm0
2165 movdqu %xmm0, -131(%rdi)
2166 L(fwd_write_115bytes):
2167 lddqu -115(%rsi), %xmm0
2168 movdqu %xmm0, -115(%rdi)
2169 L(fwd_write_99bytes):
2170 lddqu -99(%rsi), %xmm0
2171 movdqu %xmm0, -99(%rdi)
2172 L(fwd_write_83bytes):
2173 lddqu -83(%rsi), %xmm0
2174 movdqu %xmm0, -83(%rdi)
2175 L(fwd_write_67bytes):
2176 lddqu -67(%rsi), %xmm0
2177 movdqu %xmm0, -67(%rdi)
2178 L(fwd_write_51bytes):
2179 lddqu -51(%rsi), %xmm0
2180 movdqu %xmm0, -51(%rdi)
2181 L(fwd_write_35bytes):
2182 lddqu -35(%rsi), %xmm0
2183 movdqu %xmm0, -35(%rdi)
2184 L(fwd_write_19bytes):
2185 lddqu -19(%rsi), %xmm0
2186 lddqu -16(%rsi), %xmm1
2187 movdqu %xmm0, -19(%rdi)
2188 movdqu %xmm1, -16(%rdi)
2189 ret
2190
2191 .p2align 4
2192 L(fwd_write_3bytes):
2193 mov -3(%rsi), %dx
2194 mov -2(%rsi), %cx
2195 mov %dx, -3(%rdi)
2196 mov %cx, -2(%rdi)
2197 ret
2198
2199 .p2align 4
2200 L(fwd_write_130bytes):
2201 lddqu -130(%rsi), %xmm0
2202 movdqu %xmm0, -130(%rdi)
2203 L(fwd_write_114bytes):
2204 lddqu -114(%rsi), %xmm0
2205 movdqu %xmm0, -114(%rdi)
2206 L(fwd_write_98bytes):
2207 lddqu -98(%rsi), %xmm0
2208 movdqu %xmm0, -98(%rdi)
2209 L(fwd_write_82bytes):
2210 lddqu -82(%rsi), %xmm0
2211 movdqu %xmm0, -82(%rdi)
2212 L(fwd_write_66bytes):
2213 lddqu -66(%rsi), %xmm0
2214 movdqu %xmm0, -66(%rdi)
2215 L(fwd_write_50bytes):
2216 lddqu -50(%rsi), %xmm0
2217 movdqu %xmm0, -50(%rdi)
2218 L(fwd_write_34bytes):
2219 lddqu -34(%rsi), %xmm0
2220 movdqu %xmm0, -34(%rdi)
2221 L(fwd_write_18bytes):
2222 lddqu -18(%rsi), %xmm0
2223 lddqu -16(%rsi), %xmm1
2224 movdqu %xmm0, -18(%rdi)
2225 movdqu %xmm1, -16(%rdi)
2226 ret
2227
2228 .p2align 4
2229 L(fwd_write_2bytes):
2230 movzwl -2(%rsi), %edx
2231 mov %dx, -2(%rdi)
2232 ret
2233
2234 .p2align 4
2235 L(fwd_write_129bytes):
2236 lddqu -129(%rsi), %xmm0
2237 movdqu %xmm0, -129(%rdi)
2238 L(fwd_write_113bytes):
2239 lddqu -113(%rsi), %xmm0
2240 movdqu %xmm0, -113(%rdi)
2241 L(fwd_write_97bytes):
2242 lddqu -97(%rsi), %xmm0
2243 movdqu %xmm0, -97(%rdi)
2244 L(fwd_write_81bytes):
2245 lddqu -81(%rsi), %xmm0
2246 movdqu %xmm0, -81(%rdi)
2247 L(fwd_write_65bytes):
2248 lddqu -65(%rsi), %xmm0
2249 movdqu %xmm0, -65(%rdi)
2250 L(fwd_write_49bytes):
2251 lddqu -49(%rsi), %xmm0
2252 movdqu %xmm0, -49(%rdi)
2253 L(fwd_write_33bytes):
2254 lddqu -33(%rsi), %xmm0
2255 movdqu %xmm0, -33(%rdi)
2256 L(fwd_write_17bytes):
2257 lddqu -17(%rsi), %xmm0
2258 lddqu -16(%rsi), %xmm1
2259 movdqu %xmm0, -17(%rdi)
2260 movdqu %xmm1, -16(%rdi)
2261 ret
2262
2263 .p2align 4
2264 L(fwd_write_1bytes):
2265 movzbl -1(%rsi), %edx
2266 mov %dl, -1(%rdi)
2267 ret
2268
2269 .p2align 4
2270 L(bwd_write_128bytes):
2271 lddqu 112(%rsi), %xmm0
2272 movdqu %xmm0, 112(%rdi)
2273 L(bwd_write_112bytes):
2274 lddqu 96(%rsi), %xmm0
2275 movdqu %xmm0, 96(%rdi)
2276 L(bwd_write_96bytes):
2277 lddqu 80(%rsi), %xmm0
2278 movdqu %xmm0, 80(%rdi)
2279 L(bwd_write_80bytes):
2280 lddqu 64(%rsi), %xmm0
2281 movdqu %xmm0, 64(%rdi)
2282 L(bwd_write_64bytes):
2283 lddqu 48(%rsi), %xmm0
2284 movdqu %xmm0, 48(%rdi)
2285 L(bwd_write_48bytes):
2286 lddqu 32(%rsi), %xmm0
2287 movdqu %xmm0, 32(%rdi)
2288 L(bwd_write_32bytes):
2289 lddqu 16(%rsi), %xmm0
2290 movdqu %xmm0, 16(%rdi)
2291 L(bwd_write_16bytes):
2292 lddqu (%rsi), %xmm0
2293 movdqu %xmm0, (%rdi)
2294 L(bwd_write_0bytes):
2295 ret
2296
2297 .p2align 4
2298 L(bwd_write_143bytes):
2299 lddqu 127(%rsi), %xmm0
2300 movdqu %xmm0, 127(%rdi)
2301 L(bwd_write_127bytes):
2302 lddqu 111(%rsi), %xmm0
2303 movdqu %xmm0, 111(%rdi)
2304 L(bwd_write_111bytes):
2305 lddqu 95(%rsi), %xmm0
2306 movdqu %xmm0, 95(%rdi)
2307 L(bwd_write_95bytes):
2308 lddqu 79(%rsi), %xmm0
2309 movdqu %xmm0, 79(%rdi)
2310 L(bwd_write_79bytes):
2311 lddqu 63(%rsi), %xmm0
2312 movdqu %xmm0, 63(%rdi)
2313 L(bwd_write_63bytes):
2314 lddqu 47(%rsi), %xmm0
2315 movdqu %xmm0, 47(%rdi)
2316 L(bwd_write_47bytes):
2317 lddqu 31(%rsi), %xmm0
2318 movdqu %xmm0, 31(%rdi)
2319 L(bwd_write_31bytes):
2320 lddqu 15(%rsi), %xmm0
2321 lddqu (%rsi), %xmm1
2322 movdqu %xmm0, 15(%rdi)
2323 movdqu %xmm1, (%rdi)
2324 ret
2325
2326
2327 .p2align 4
2328 L(bwd_write_15bytes):
2329 mov 7(%rsi), %rdx
2330 mov (%rsi), %rcx
2331 mov %rdx, 7(%rdi)
2332 mov %rcx, (%rdi)
2333 ret
2334
2335 .p2align 4
2336 L(bwd_write_142bytes):
2337 lddqu 126(%rsi), %xmm0
2338 movdqu %xmm0, 126(%rdi)
2339 L(bwd_write_126bytes):
2340 lddqu 110(%rsi), %xmm0
2341 movdqu %xmm0, 110(%rdi)
2342 L(bwd_write_110bytes):
2343 lddqu 94(%rsi), %xmm0
2344 movdqu %xmm0, 94(%rdi)
2345 L(bwd_write_94bytes):
2346 lddqu 78(%rsi), %xmm0
2347 movdqu %xmm0, 78(%rdi)
2348 L(bwd_write_78bytes):
2349 lddqu 62(%rsi), %xmm0
2350 movdqu %xmm0, 62(%rdi)
2351 L(bwd_write_62bytes):
2352 lddqu 46(%rsi), %xmm0
2353 movdqu %xmm0, 46(%rdi)
2354 L(bwd_write_46bytes):
2355 lddqu 30(%rsi), %xmm0
2356 movdqu %xmm0, 30(%rdi)
2357 L(bwd_write_30bytes):
2358 lddqu 14(%rsi), %xmm0
2359 lddqu (%rsi), %xmm1
2360 movdqu %xmm0, 14(%rdi)
2361 movdqu %xmm1, (%rdi)
2362 ret
2363
2364 .p2align 4
2365 L(bwd_write_14bytes):
2366 mov 6(%rsi), %rdx
2367 mov (%rsi), %rcx
2368 mov %rdx, 6(%rdi)
2369 mov %rcx, (%rdi)
2370 ret
2371
2372 .p2align 4
2373 L(bwd_write_141bytes):
2374 lddqu 125(%rsi), %xmm0
2375 movdqu %xmm0, 125(%rdi)
2376 L(bwd_write_125bytes):
2377 lddqu 109(%rsi), %xmm0
2378 movdqu %xmm0, 109(%rdi)
2379 L(bwd_write_109bytes):
2380 lddqu 93(%rsi), %xmm0
2381 movdqu %xmm0, 93(%rdi)
2382 L(bwd_write_93bytes):
2383 lddqu 77(%rsi), %xmm0
2384 movdqu %xmm0, 77(%rdi)
2385 L(bwd_write_77bytes):
2386 lddqu 61(%rsi), %xmm0
2387 movdqu %xmm0, 61(%rdi)
2388 L(bwd_write_61bytes):
2389 lddqu 45(%rsi), %xmm0
2390 movdqu %xmm0, 45(%rdi)
2391 L(bwd_write_45bytes):
2392 lddqu 29(%rsi), %xmm0
2393 movdqu %xmm0, 29(%rdi)
2394 L(bwd_write_29bytes):
2395 lddqu 13(%rsi), %xmm0
2396 lddqu (%rsi), %xmm1
2397 movdqu %xmm0, 13(%rdi)
2398 movdqu %xmm1, (%rdi)
2399 ret
2400
2401 .p2align 4
2402 L(bwd_write_13bytes):
2403 mov 5(%rsi), %rdx
2404 mov (%rsi), %rcx
2405 mov %rdx, 5(%rdi)
2406 mov %rcx, (%rdi)
2407 ret
2408
2409 .p2align 4
2410 L(bwd_write_140bytes):
2411 lddqu 124(%rsi), %xmm0
2412 movdqu %xmm0, 124(%rdi)
2413 L(bwd_write_124bytes):
2414 lddqu 108(%rsi), %xmm0
2415 movdqu %xmm0, 108(%rdi)
2416 L(bwd_write_108bytes):
2417 lddqu 92(%rsi), %xmm0
2418 movdqu %xmm0, 92(%rdi)
2419 L(bwd_write_92bytes):
2420 lddqu 76(%rsi), %xmm0
2421 movdqu %xmm0, 76(%rdi)
2422 L(bwd_write_76bytes):
2423 lddqu 60(%rsi), %xmm0
2424 movdqu %xmm0, 60(%rdi)
2425 L(bwd_write_60bytes):
2426 lddqu 44(%rsi), %xmm0
2427 movdqu %xmm0, 44(%rdi)
2428 L(bwd_write_44bytes):
2429 lddqu 28(%rsi), %xmm0
2430 movdqu %xmm0, 28(%rdi)
2431 L(bwd_write_28bytes):
2432 lddqu 12(%rsi), %xmm0
2433 lddqu (%rsi), %xmm1
2434 movdqu %xmm0, 12(%rdi)
2435 movdqu %xmm1, (%rdi)
2436 ret
2437
2438 .p2align 4
2439 L(bwd_write_12bytes):
2440 mov 4(%rsi), %rdx
2441 mov (%rsi), %rcx
2442 mov %rdx, 4(%rdi)
2443 mov %rcx, (%rdi)
2444 ret
2445
2446 .p2align 4
2447 L(bwd_write_139bytes):
2448 lddqu 123(%rsi), %xmm0
2449 movdqu %xmm0, 123(%rdi)
2450 L(bwd_write_123bytes):
2451 lddqu 107(%rsi), %xmm0
2452 movdqu %xmm0, 107(%rdi)
2453 L(bwd_write_107bytes):
2454 lddqu 91(%rsi), %xmm0
2455 movdqu %xmm0, 91(%rdi)
2456 L(bwd_write_91bytes):
2457 lddqu 75(%rsi), %xmm0
2458 movdqu %xmm0, 75(%rdi)
2459 L(bwd_write_75bytes):
2460 lddqu 59(%rsi), %xmm0
2461 movdqu %xmm0, 59(%rdi)
2462 L(bwd_write_59bytes):
2463 lddqu 43(%rsi), %xmm0
2464 movdqu %xmm0, 43(%rdi)
2465 L(bwd_write_43bytes):
2466 lddqu 27(%rsi), %xmm0
2467 movdqu %xmm0, 27(%rdi)
2468 L(bwd_write_27bytes):
2469 lddqu 11(%rsi), %xmm0
2470 lddqu (%rsi), %xmm1
2471 movdqu %xmm0, 11(%rdi)
2472 movdqu %xmm1, (%rdi)
2473 ret
2474
2475 .p2align 4
2476 L(bwd_write_11bytes):
2477 mov 3(%rsi), %rdx
2478 mov (%rsi), %rcx
2479 mov %rdx, 3(%rdi)
2480 mov %rcx, (%rdi)
2481 ret
2482
2483 .p2align 4
2484 L(bwd_write_138bytes):
2485 lddqu 122(%rsi), %xmm0
2486 movdqu %xmm0, 122(%rdi)
2487 L(bwd_write_122bytes):
2488 lddqu 106(%rsi), %xmm0
2489 movdqu %xmm0, 106(%rdi)
2490 L(bwd_write_106bytes):
2491 lddqu 90(%rsi), %xmm0
2492 movdqu %xmm0, 90(%rdi)
2493 L(bwd_write_90bytes):
2494 lddqu 74(%rsi), %xmm0
2495 movdqu %xmm0, 74(%rdi)
2496 L(bwd_write_74bytes):
2497 lddqu 58(%rsi), %xmm0
2498 movdqu %xmm0, 58(%rdi)
2499 L(bwd_write_58bytes):
2500 lddqu 42(%rsi), %xmm0
2501 movdqu %xmm0, 42(%rdi)
2502 L(bwd_write_42bytes):
2503 lddqu 26(%rsi), %xmm0
2504 movdqu %xmm0, 26(%rdi)
2505 L(bwd_write_26bytes):
2506 lddqu 10(%rsi), %xmm0
2507 lddqu (%rsi), %xmm1
2508 movdqu %xmm0, 10(%rdi)
2509 movdqu %xmm1, (%rdi)
2510 ret
2511
2512 .p2align 4
2513 L(bwd_write_10bytes):
2514 mov 2(%rsi), %rdx
2515 mov (%rsi), %rcx
2516 mov %rdx, 2(%rdi)
2517 mov %rcx, (%rdi)
2518 ret
2519
2520 .p2align 4
2521 L(bwd_write_137bytes):
2522 lddqu 121(%rsi), %xmm0
2523 movdqu %xmm0, 121(%rdi)
2524 L(bwd_write_121bytes):
2525 lddqu 105(%rsi), %xmm0
2526 movdqu %xmm0, 105(%rdi)
2527 L(bwd_write_105bytes):
2528 lddqu 89(%rsi), %xmm0
2529 movdqu %xmm0, 89(%rdi)
2530 L(bwd_write_89bytes):
2531 lddqu 73(%rsi), %xmm0
2532 movdqu %xmm0, 73(%rdi)
2533 L(bwd_write_73bytes):
2534 lddqu 57(%rsi), %xmm0
2535 movdqu %xmm0, 57(%rdi)
2536 L(bwd_write_57bytes):
2537 lddqu 41(%rsi), %xmm0
2538 movdqu %xmm0, 41(%rdi)
2539 L(bwd_write_41bytes):
2540 lddqu 25(%rsi), %xmm0
2541 movdqu %xmm0, 25(%rdi)
2542 L(bwd_write_25bytes):
2543 lddqu 9(%rsi), %xmm0
2544 lddqu (%rsi), %xmm1
2545 movdqu %xmm0, 9(%rdi)
2546 movdqu %xmm1, (%rdi)
2547 ret
2548
2549 .p2align 4
2550 L(bwd_write_9bytes):
2551 mov 1(%rsi), %rdx
2552 mov (%rsi), %rcx
2553 mov %rdx, 1(%rdi)
2554 mov %rcx, (%rdi)
2555 ret
2556
2557 .p2align 4
2558 L(bwd_write_136bytes):
2559 lddqu 120(%rsi), %xmm0
2560 movdqu %xmm0, 120(%rdi)
2561 L(bwd_write_120bytes):
2562 lddqu 104(%rsi), %xmm0
2563 movdqu %xmm0, 104(%rdi)
2564 L(bwd_write_104bytes):
2565 lddqu 88(%rsi), %xmm0
2566 movdqu %xmm0, 88(%rdi)
2567 L(bwd_write_88bytes):
2568 lddqu 72(%rsi), %xmm0
2569 movdqu %xmm0, 72(%rdi)
2570 L(bwd_write_72bytes):
2571 lddqu 56(%rsi), %xmm0
2572 movdqu %xmm0, 56(%rdi)
2573 L(bwd_write_56bytes):
2574 lddqu 40(%rsi), %xmm0
2575 movdqu %xmm0, 40(%rdi)
2576 L(bwd_write_40bytes):
2577 lddqu 24(%rsi), %xmm0
2578 movdqu %xmm0, 24(%rdi)
2579 L(bwd_write_24bytes):
2580 lddqu 8(%rsi), %xmm0
2581 lddqu (%rsi), %xmm1
2582 movdqu %xmm0, 8(%rdi)
2583 movdqu %xmm1, (%rdi)
2584 ret
2585
2586 .p2align 4
2587 L(bwd_write_8bytes):
2588 mov (%rsi), %rdx
2589 mov %rdx, (%rdi)
2590 ret
2591
2592 .p2align 4
2593 L(bwd_write_135bytes):
2594 lddqu 119(%rsi), %xmm0
2595 movdqu %xmm0, 119(%rdi)
2596 L(bwd_write_119bytes):
2597 lddqu 103(%rsi), %xmm0
2598 movdqu %xmm0, 103(%rdi)
2599 L(bwd_write_103bytes):
2600 lddqu 87(%rsi), %xmm0
2601 movdqu %xmm0, 87(%rdi)
2602 L(bwd_write_87bytes):
2603 lddqu 71(%rsi), %xmm0
2604 movdqu %xmm0, 71(%rdi)
2605 L(bwd_write_71bytes):
2606 lddqu 55(%rsi), %xmm0
2607 movdqu %xmm0, 55(%rdi)
2608 L(bwd_write_55bytes):
2609 lddqu 39(%rsi), %xmm0
2610 movdqu %xmm0, 39(%rdi)
2611 L(bwd_write_39bytes):
2612 lddqu 23(%rsi), %xmm0
2613 movdqu %xmm0, 23(%rdi)
2614 L(bwd_write_23bytes):
2615 lddqu 7(%rsi), %xmm0
2616 lddqu (%rsi), %xmm1
2617 movdqu %xmm0, 7(%rdi)
2618 movdqu %xmm1, (%rdi)
2619 ret
2620
2621 .p2align 4
2622 L(bwd_write_7bytes):
2623 mov 3(%rsi), %edx
2624 mov (%rsi), %ecx
2625 mov %edx, 3(%rdi)
2626 mov %ecx, (%rdi)
2627 ret
2628
2629 .p2align 4
2630 L(bwd_write_134bytes):
2631 lddqu 118(%rsi), %xmm0
2632 movdqu %xmm0, 118(%rdi)
2633 L(bwd_write_118bytes):
2634 lddqu 102(%rsi), %xmm0
2635 movdqu %xmm0, 102(%rdi)
2636 L(bwd_write_102bytes):
2637 lddqu 86(%rsi), %xmm0
2638 movdqu %xmm0, 86(%rdi)
2639 L(bwd_write_86bytes):
2640 lddqu 70(%rsi), %xmm0
2641 movdqu %xmm0, 70(%rdi)
2642 L(bwd_write_70bytes):
2643 lddqu 54(%rsi), %xmm0
2644 movdqu %xmm0, 54(%rdi)
2645 L(bwd_write_54bytes):
2646 lddqu 38(%rsi), %xmm0
2647 movdqu %xmm0, 38(%rdi)
2648 L(bwd_write_38bytes):
2649 lddqu 22(%rsi), %xmm0
2650 movdqu %xmm0, 22(%rdi)
2651 L(bwd_write_22bytes):
2652 lddqu 6(%rsi), %xmm0
2653 lddqu (%rsi), %xmm1
2654 movdqu %xmm0, 6(%rdi)
2655 movdqu %xmm1, (%rdi)
2656 ret
2657
2658 .p2align 4
2659 L(bwd_write_6bytes):
2660 mov 2(%rsi), %edx
2661 mov (%rsi), %ecx
2662 mov %edx, 2(%rdi)
2663 mov %ecx, (%rdi)
2664 ret
2665
2666 .p2align 4
2667 L(bwd_write_133bytes):
2668 lddqu 117(%rsi), %xmm0
2669 movdqu %xmm0, 117(%rdi)
2670 L(bwd_write_117bytes):
2671 lddqu 101(%rsi), %xmm0
2672 movdqu %xmm0, 101(%rdi)
2673 L(bwd_write_101bytes):
2674 lddqu 85(%rsi), %xmm0
2675 movdqu %xmm0, 85(%rdi)
2676 L(bwd_write_85bytes):
2677 lddqu 69(%rsi), %xmm0
2678 movdqu %xmm0, 69(%rdi)
2679 L(bwd_write_69bytes):
2680 lddqu 53(%rsi), %xmm0
2681 movdqu %xmm0, 53(%rdi)
2682 L(bwd_write_53bytes):
2683 lddqu 37(%rsi), %xmm0
2684 movdqu %xmm0, 37(%rdi)
2685 L(bwd_write_37bytes):
2686 lddqu 21(%rsi), %xmm0
2687 movdqu %xmm0, 21(%rdi)
2688 L(bwd_write_21bytes):
2689 lddqu 5(%rsi), %xmm0
2690 lddqu (%rsi), %xmm1
2691 movdqu %xmm0, 5(%rdi)
2692 movdqu %xmm1, (%rdi)
2693 ret
2694
2695 .p2align 4
2696 L(bwd_write_5bytes):
2697 mov 1(%rsi), %edx
2698 mov (%rsi), %ecx
2699 mov %edx, 1(%rdi)
2700 mov %ecx, (%rdi)
2701 ret
2702
2703 .p2align 4
2704 L(bwd_write_132bytes):
2705 lddqu 116(%rsi), %xmm0
2706 movdqu %xmm0, 116(%rdi)
2707 L(bwd_write_116bytes):
2708 lddqu 100(%rsi), %xmm0
2709 movdqu %xmm0, 100(%rdi)
2710 L(bwd_write_100bytes):
2711 lddqu 84(%rsi), %xmm0
2712 movdqu %xmm0, 84(%rdi)
2713 L(bwd_write_84bytes):
2714 lddqu 68(%rsi), %xmm0
2715 movdqu %xmm0, 68(%rdi)
2716 L(bwd_write_68bytes):
2717 lddqu 52(%rsi), %xmm0
2718 movdqu %xmm0, 52(%rdi)
2719 L(bwd_write_52bytes):
2720 lddqu 36(%rsi), %xmm0
2721 movdqu %xmm0, 36(%rdi)
2722 L(bwd_write_36bytes):
2723 lddqu 20(%rsi), %xmm0
2724 movdqu %xmm0, 20(%rdi)
2725 L(bwd_write_20bytes):
2726 lddqu 4(%rsi), %xmm0
2727 lddqu (%rsi), %xmm1
2728 movdqu %xmm0, 4(%rdi)
2729 movdqu %xmm1, (%rdi)
2730 ret
2731
2732 .p2align 4
2733 L(bwd_write_4bytes):
2734 mov (%rsi), %edx
2735 mov %edx, (%rdi)
2736 ret
2737
2738 .p2align 4
2739 L(bwd_write_131bytes):
2740 lddqu 115(%rsi), %xmm0
2741 movdqu %xmm0, 115(%rdi)
2742 L(bwd_write_115bytes):
2743 lddqu 99(%rsi), %xmm0
2744 movdqu %xmm0, 99(%rdi)
2745 L(bwd_write_99bytes):
2746 lddqu 83(%rsi), %xmm0
2747 movdqu %xmm0, 83(%rdi)
2748 L(bwd_write_83bytes):
2749 lddqu 67(%rsi), %xmm0
2750 movdqu %xmm0, 67(%rdi)
2751 L(bwd_write_67bytes):
2752 lddqu 51(%rsi), %xmm0
2753 movdqu %xmm0, 51(%rdi)
2754 L(bwd_write_51bytes):
2755 lddqu 35(%rsi), %xmm0
2756 movdqu %xmm0, 35(%rdi)
2757 L(bwd_write_35bytes):
2758 lddqu 19(%rsi), %xmm0
2759 movdqu %xmm0, 19(%rdi)
2760 L(bwd_write_19bytes):
2761 lddqu 3(%rsi), %xmm0
2762 lddqu (%rsi), %xmm1
2763 movdqu %xmm0, 3(%rdi)
2764 movdqu %xmm1, (%rdi)
2765 ret
2766
2767 .p2align 4
2768 L(bwd_write_3bytes):
2769 mov 1(%rsi), %dx
2770 mov (%rsi), %cx
2771 mov %dx, 1(%rdi)
2772 mov %cx, (%rdi)
2773 ret
2774
2775 .p2align 4
2776 L(bwd_write_130bytes):
2777 lddqu 114(%rsi), %xmm0
2778 movdqu %xmm0, 114(%rdi)
2779 L(bwd_write_114bytes):
2780 lddqu 98(%rsi), %xmm0
2781 movdqu %xmm0, 98(%rdi)
2782 L(bwd_write_98bytes):
2783 lddqu 82(%rsi), %xmm0
2784 movdqu %xmm0, 82(%rdi)
2785 L(bwd_write_82bytes):
2786 lddqu 66(%rsi), %xmm0
2787 movdqu %xmm0, 66(%rdi)
2788 L(bwd_write_66bytes):
2789 lddqu 50(%rsi), %xmm0
2790 movdqu %xmm0, 50(%rdi)
2791 L(bwd_write_50bytes):
2792 lddqu 34(%rsi), %xmm0
2793 movdqu %xmm0, 34(%rdi)
2794 L(bwd_write_34bytes):
2795 lddqu 18(%rsi), %xmm0
2796 movdqu %xmm0, 18(%rdi)
2797 L(bwd_write_18bytes):
2798 lddqu 2(%rsi), %xmm0
2799 lddqu (%rsi), %xmm1
2800 movdqu %xmm0, 2(%rdi)
2801 movdqu %xmm1, (%rdi)
2802 ret
2803
2804 .p2align 4
2805 L(bwd_write_2bytes):
2806 movzwl (%rsi), %edx
2807 mov %dx, (%rdi)
2808 ret
2809
2810 .p2align 4
2811 L(bwd_write_129bytes):
2812 lddqu 113(%rsi), %xmm0
2813 movdqu %xmm0, 113(%rdi)
2814 L(bwd_write_113bytes):
2815 lddqu 97(%rsi), %xmm0
2816 movdqu %xmm0, 97(%rdi)
2817 L(bwd_write_97bytes):
2818 lddqu 81(%rsi), %xmm0
2819 movdqu %xmm0, 81(%rdi)
2820 L(bwd_write_81bytes):
2821 lddqu 65(%rsi), %xmm0
2822 movdqu %xmm0, 65(%rdi)
2823 L(bwd_write_65bytes):
2824 lddqu 49(%rsi), %xmm0
2825 movdqu %xmm0, 49(%rdi)
2826 L(bwd_write_49bytes):
2827 lddqu 33(%rsi), %xmm0
2828 movdqu %xmm0, 33(%rdi)
2829 L(bwd_write_33bytes):
2830 lddqu 17(%rsi), %xmm0
2831 movdqu %xmm0, 17(%rdi)
2832 L(bwd_write_17bytes):
2833 lddqu 1(%rsi), %xmm0
2834 lddqu (%rsi), %xmm1
2835 movdqu %xmm0, 1(%rdi)
2836 movdqu %xmm1, (%rdi)
2837 ret
2838
2839 .p2align 4
2840 L(bwd_write_1bytes):
2841 movzbl (%rsi), %edx
2842 mov %dl, (%rdi)
2843 ret
2844
2845 END (MEMCPY)
2846
2847 .section .rodata.ssse3,"a",@progbits
2848 .p2align 3
2849 L(table_144_bytes_bwd):
2850 .int JMPTBL (L(bwd_write_0bytes), L(table_144_bytes_bwd))
2851 .int JMPTBL (L(bwd_write_1bytes), L(table_144_bytes_bwd))
2852 .int JMPTBL (L(bwd_write_2bytes), L(table_144_bytes_bwd))
2853 .int JMPTBL (L(bwd_write_3bytes), L(table_144_bytes_bwd))
2854 .int JMPTBL (L(bwd_write_4bytes), L(table_144_bytes_bwd))
2855 .int JMPTBL (L(bwd_write_5bytes), L(table_144_bytes_bwd))
2856 .int JMPTBL (L(bwd_write_6bytes), L(table_144_bytes_bwd))
2857 .int JMPTBL (L(bwd_write_7bytes), L(table_144_bytes_bwd))
2858 .int JMPTBL (L(bwd_write_8bytes), L(table_144_bytes_bwd))
2859 .int JMPTBL (L(bwd_write_9bytes), L(table_144_bytes_bwd))
2860 .int JMPTBL (L(bwd_write_10bytes), L(table_144_bytes_bwd))
2861 .int JMPTBL (L(bwd_write_11bytes), L(table_144_bytes_bwd))
2862 .int JMPTBL (L(bwd_write_12bytes), L(table_144_bytes_bwd))
2863 .int JMPTBL (L(bwd_write_13bytes), L(table_144_bytes_bwd))
2864 .int JMPTBL (L(bwd_write_14bytes), L(table_144_bytes_bwd))
2865 .int JMPTBL (L(bwd_write_15bytes), L(table_144_bytes_bwd))
2866 .int JMPTBL (L(bwd_write_16bytes), L(table_144_bytes_bwd))
2867 .int JMPTBL (L(bwd_write_17bytes), L(table_144_bytes_bwd))
2868 .int JMPTBL (L(bwd_write_18bytes), L(table_144_bytes_bwd))
2869 .int JMPTBL (L(bwd_write_19bytes), L(table_144_bytes_bwd))
2870 .int JMPTBL (L(bwd_write_20bytes), L(table_144_bytes_bwd))
2871 .int JMPTBL (L(bwd_write_21bytes), L(table_144_bytes_bwd))
2872 .int JMPTBL (L(bwd_write_22bytes), L(table_144_bytes_bwd))
2873 .int JMPTBL (L(bwd_write_23bytes), L(table_144_bytes_bwd))
2874 .int JMPTBL (L(bwd_write_24bytes), L(table_144_bytes_bwd))
2875 .int JMPTBL (L(bwd_write_25bytes), L(table_144_bytes_bwd))
2876 .int JMPTBL (L(bwd_write_26bytes), L(table_144_bytes_bwd))
2877 .int JMPTBL (L(bwd_write_27bytes), L(table_144_bytes_bwd))
2878 .int JMPTBL (L(bwd_write_28bytes), L(table_144_bytes_bwd))
2879 .int JMPTBL (L(bwd_write_29bytes), L(table_144_bytes_bwd))
2880 .int JMPTBL (L(bwd_write_30bytes), L(table_144_bytes_bwd))
2881 .int JMPTBL (L(bwd_write_31bytes), L(table_144_bytes_bwd))
2882 .int JMPTBL (L(bwd_write_32bytes), L(table_144_bytes_bwd))
2883 .int JMPTBL (L(bwd_write_33bytes), L(table_144_bytes_bwd))
2884 .int JMPTBL (L(bwd_write_34bytes), L(table_144_bytes_bwd))
2885 .int JMPTBL (L(bwd_write_35bytes), L(table_144_bytes_bwd))
2886 .int JMPTBL (L(bwd_write_36bytes), L(table_144_bytes_bwd))
2887 .int JMPTBL (L(bwd_write_37bytes), L(table_144_bytes_bwd))
2888 .int JMPTBL (L(bwd_write_38bytes), L(table_144_bytes_bwd))
2889 .int JMPTBL (L(bwd_write_39bytes), L(table_144_bytes_bwd))
2890 .int JMPTBL (L(bwd_write_40bytes), L(table_144_bytes_bwd))
2891 .int JMPTBL (L(bwd_write_41bytes), L(table_144_bytes_bwd))
2892 .int JMPTBL (L(bwd_write_42bytes), L(table_144_bytes_bwd))
2893 .int JMPTBL (L(bwd_write_43bytes), L(table_144_bytes_bwd))
2894 .int JMPTBL (L(bwd_write_44bytes), L(table_144_bytes_bwd))
2895 .int JMPTBL (L(bwd_write_45bytes), L(table_144_bytes_bwd))
2896 .int JMPTBL (L(bwd_write_46bytes), L(table_144_bytes_bwd))
2897 .int JMPTBL (L(bwd_write_47bytes), L(table_144_bytes_bwd))
2898 .int JMPTBL (L(bwd_write_48bytes), L(table_144_bytes_bwd))
2899 .int JMPTBL (L(bwd_write_49bytes), L(table_144_bytes_bwd))
2900 .int JMPTBL (L(bwd_write_50bytes), L(table_144_bytes_bwd))
2901 .int JMPTBL (L(bwd_write_51bytes), L(table_144_bytes_bwd))
2902 .int JMPTBL (L(bwd_write_52bytes), L(table_144_bytes_bwd))
2903 .int JMPTBL (L(bwd_write_53bytes), L(table_144_bytes_bwd))
2904 .int JMPTBL (L(bwd_write_54bytes), L(table_144_bytes_bwd))
2905 .int JMPTBL (L(bwd_write_55bytes), L(table_144_bytes_bwd))
2906 .int JMPTBL (L(bwd_write_56bytes), L(table_144_bytes_bwd))
2907 .int JMPTBL (L(bwd_write_57bytes), L(table_144_bytes_bwd))
2908 .int JMPTBL (L(bwd_write_58bytes), L(table_144_bytes_bwd))
2909 .int JMPTBL (L(bwd_write_59bytes), L(table_144_bytes_bwd))
2910 .int JMPTBL (L(bwd_write_60bytes), L(table_144_bytes_bwd))
2911 .int JMPTBL (L(bwd_write_61bytes), L(table_144_bytes_bwd))
2912 .int JMPTBL (L(bwd_write_62bytes), L(table_144_bytes_bwd))
2913 .int JMPTBL (L(bwd_write_63bytes), L(table_144_bytes_bwd))
2914 .int JMPTBL (L(bwd_write_64bytes), L(table_144_bytes_bwd))
2915 .int JMPTBL (L(bwd_write_65bytes), L(table_144_bytes_bwd))
2916 .int JMPTBL (L(bwd_write_66bytes), L(table_144_bytes_bwd))
2917 .int JMPTBL (L(bwd_write_67bytes), L(table_144_bytes_bwd))
2918 .int JMPTBL (L(bwd_write_68bytes), L(table_144_bytes_bwd))
2919 .int JMPTBL (L(bwd_write_69bytes), L(table_144_bytes_bwd))
2920 .int JMPTBL (L(bwd_write_70bytes), L(table_144_bytes_bwd))
2921 .int JMPTBL (L(bwd_write_71bytes), L(table_144_bytes_bwd))
2922 .int JMPTBL (L(bwd_write_72bytes), L(table_144_bytes_bwd))
2923 .int JMPTBL (L(bwd_write_73bytes), L(table_144_bytes_bwd))
2924 .int JMPTBL (L(bwd_write_74bytes), L(table_144_bytes_bwd))
2925 .int JMPTBL (L(bwd_write_75bytes), L(table_144_bytes_bwd))
2926 .int JMPTBL (L(bwd_write_76bytes), L(table_144_bytes_bwd))
2927 .int JMPTBL (L(bwd_write_77bytes), L(table_144_bytes_bwd))
2928 .int JMPTBL (L(bwd_write_78bytes), L(table_144_bytes_bwd))
2929 .int JMPTBL (L(bwd_write_79bytes), L(table_144_bytes_bwd))
2930 .int JMPTBL (L(bwd_write_80bytes), L(table_144_bytes_bwd))
2931 .int JMPTBL (L(bwd_write_81bytes), L(table_144_bytes_bwd))
2932 .int JMPTBL (L(bwd_write_82bytes), L(table_144_bytes_bwd))
2933 .int JMPTBL (L(bwd_write_83bytes), L(table_144_bytes_bwd))
2934 .int JMPTBL (L(bwd_write_84bytes), L(table_144_bytes_bwd))
2935 .int JMPTBL (L(bwd_write_85bytes), L(table_144_bytes_bwd))
2936 .int JMPTBL (L(bwd_write_86bytes), L(table_144_bytes_bwd))
2937 .int JMPTBL (L(bwd_write_87bytes), L(table_144_bytes_bwd))
2938 .int JMPTBL (L(bwd_write_88bytes), L(table_144_bytes_bwd))
2939 .int JMPTBL (L(bwd_write_89bytes), L(table_144_bytes_bwd))
2940 .int JMPTBL (L(bwd_write_90bytes), L(table_144_bytes_bwd))
2941 .int JMPTBL (L(bwd_write_91bytes), L(table_144_bytes_bwd))
2942 .int JMPTBL (L(bwd_write_92bytes), L(table_144_bytes_bwd))
2943 .int JMPTBL (L(bwd_write_93bytes), L(table_144_bytes_bwd))
2944 .int JMPTBL (L(bwd_write_94bytes), L(table_144_bytes_bwd))
2945 .int JMPTBL (L(bwd_write_95bytes), L(table_144_bytes_bwd))
2946 .int JMPTBL (L(bwd_write_96bytes), L(table_144_bytes_bwd))
2947 .int JMPTBL (L(bwd_write_97bytes), L(table_144_bytes_bwd))
2948 .int JMPTBL (L(bwd_write_98bytes), L(table_144_bytes_bwd))
2949 .int JMPTBL (L(bwd_write_99bytes), L(table_144_bytes_bwd))
2950 .int JMPTBL (L(bwd_write_100bytes), L(table_144_bytes_bwd))
2951 .int JMPTBL (L(bwd_write_101bytes), L(table_144_bytes_bwd))
2952 .int JMPTBL (L(bwd_write_102bytes), L(table_144_bytes_bwd))
2953 .int JMPTBL (L(bwd_write_103bytes), L(table_144_bytes_bwd))
2954 .int JMPTBL (L(bwd_write_104bytes), L(table_144_bytes_bwd))
2955 .int JMPTBL (L(bwd_write_105bytes), L(table_144_bytes_bwd))
2956 .int JMPTBL (L(bwd_write_106bytes), L(table_144_bytes_bwd))
2957 .int JMPTBL (L(bwd_write_107bytes), L(table_144_bytes_bwd))
2958 .int JMPTBL (L(bwd_write_108bytes), L(table_144_bytes_bwd))
2959 .int JMPTBL (L(bwd_write_109bytes), L(table_144_bytes_bwd))
2960 .int JMPTBL (L(bwd_write_110bytes), L(table_144_bytes_bwd))
2961 .int JMPTBL (L(bwd_write_111bytes), L(table_144_bytes_bwd))
2962 .int JMPTBL (L(bwd_write_112bytes), L(table_144_bytes_bwd))
2963 .int JMPTBL (L(bwd_write_113bytes), L(table_144_bytes_bwd))
2964 .int JMPTBL (L(bwd_write_114bytes), L(table_144_bytes_bwd))
2965 .int JMPTBL (L(bwd_write_115bytes), L(table_144_bytes_bwd))
2966 .int JMPTBL (L(bwd_write_116bytes), L(table_144_bytes_bwd))
2967 .int JMPTBL (L(bwd_write_117bytes), L(table_144_bytes_bwd))
2968 .int JMPTBL (L(bwd_write_118bytes), L(table_144_bytes_bwd))
2969 .int JMPTBL (L(bwd_write_119bytes), L(table_144_bytes_bwd))
2970 .int JMPTBL (L(bwd_write_120bytes), L(table_144_bytes_bwd))
2971 .int JMPTBL (L(bwd_write_121bytes), L(table_144_bytes_bwd))
2972 .int JMPTBL (L(bwd_write_122bytes), L(table_144_bytes_bwd))
2973 .int JMPTBL (L(bwd_write_123bytes), L(table_144_bytes_bwd))
2974 .int JMPTBL (L(bwd_write_124bytes), L(table_144_bytes_bwd))
2975 .int JMPTBL (L(bwd_write_125bytes), L(table_144_bytes_bwd))
2976 .int JMPTBL (L(bwd_write_126bytes), L(table_144_bytes_bwd))
2977 .int JMPTBL (L(bwd_write_127bytes), L(table_144_bytes_bwd))
2978 .int JMPTBL (L(bwd_write_128bytes), L(table_144_bytes_bwd))
2979 .int JMPTBL (L(bwd_write_129bytes), L(table_144_bytes_bwd))
2980 .int JMPTBL (L(bwd_write_130bytes), L(table_144_bytes_bwd))
2981 .int JMPTBL (L(bwd_write_131bytes), L(table_144_bytes_bwd))
2982 .int JMPTBL (L(bwd_write_132bytes), L(table_144_bytes_bwd))
2983 .int JMPTBL (L(bwd_write_133bytes), L(table_144_bytes_bwd))
2984 .int JMPTBL (L(bwd_write_134bytes), L(table_144_bytes_bwd))
2985 .int JMPTBL (L(bwd_write_135bytes), L(table_144_bytes_bwd))
2986 .int JMPTBL (L(bwd_write_136bytes), L(table_144_bytes_bwd))
2987 .int JMPTBL (L(bwd_write_137bytes), L(table_144_bytes_bwd))
2988 .int JMPTBL (L(bwd_write_138bytes), L(table_144_bytes_bwd))
2989 .int JMPTBL (L(bwd_write_139bytes), L(table_144_bytes_bwd))
2990 .int JMPTBL (L(bwd_write_140bytes), L(table_144_bytes_bwd))
2991 .int JMPTBL (L(bwd_write_141bytes), L(table_144_bytes_bwd))
2992 .int JMPTBL (L(bwd_write_142bytes), L(table_144_bytes_bwd))
2993 .int JMPTBL (L(bwd_write_143bytes), L(table_144_bytes_bwd))
2994
2995 .p2align 3
2996 L(table_144_bytes_fwd):
2997 .int JMPTBL (L(fwd_write_0bytes), L(table_144_bytes_fwd))
2998 .int JMPTBL (L(fwd_write_1bytes), L(table_144_bytes_fwd))
2999 .int JMPTBL (L(fwd_write_2bytes), L(table_144_bytes_fwd))
3000 .int JMPTBL (L(fwd_write_3bytes), L(table_144_bytes_fwd))
3001 .int JMPTBL (L(fwd_write_4bytes), L(table_144_bytes_fwd))
3002 .int JMPTBL (L(fwd_write_5bytes), L(table_144_bytes_fwd))
3003 .int JMPTBL (L(fwd_write_6bytes), L(table_144_bytes_fwd))
3004 .int JMPTBL (L(fwd_write_7bytes), L(table_144_bytes_fwd))
3005 .int JMPTBL (L(fwd_write_8bytes), L(table_144_bytes_fwd))
3006 .int JMPTBL (L(fwd_write_9bytes), L(table_144_bytes_fwd))
3007 .int JMPTBL (L(fwd_write_10bytes), L(table_144_bytes_fwd))
3008 .int JMPTBL (L(fwd_write_11bytes), L(table_144_bytes_fwd))
3009 .int JMPTBL (L(fwd_write_12bytes), L(table_144_bytes_fwd))
3010 .int JMPTBL (L(fwd_write_13bytes), L(table_144_bytes_fwd))
3011 .int JMPTBL (L(fwd_write_14bytes), L(table_144_bytes_fwd))
3012 .int JMPTBL (L(fwd_write_15bytes), L(table_144_bytes_fwd))
3013 .int JMPTBL (L(fwd_write_16bytes), L(table_144_bytes_fwd))
3014 .int JMPTBL (L(fwd_write_17bytes), L(table_144_bytes_fwd))
3015 .int JMPTBL (L(fwd_write_18bytes), L(table_144_bytes_fwd))
3016 .int JMPTBL (L(fwd_write_19bytes), L(table_144_bytes_fwd))
3017 .int JMPTBL (L(fwd_write_20bytes), L(table_144_bytes_fwd))
3018 .int JMPTBL (L(fwd_write_21bytes), L(table_144_bytes_fwd))
3019 .int JMPTBL (L(fwd_write_22bytes), L(table_144_bytes_fwd))
3020 .int JMPTBL (L(fwd_write_23bytes), L(table_144_bytes_fwd))
3021 .int JMPTBL (L(fwd_write_24bytes), L(table_144_bytes_fwd))
3022 .int JMPTBL (L(fwd_write_25bytes), L(table_144_bytes_fwd))
3023 .int JMPTBL (L(fwd_write_26bytes), L(table_144_bytes_fwd))
3024 .int JMPTBL (L(fwd_write_27bytes), L(table_144_bytes_fwd))
3025 .int JMPTBL (L(fwd_write_28bytes), L(table_144_bytes_fwd))
3026 .int JMPTBL (L(fwd_write_29bytes), L(table_144_bytes_fwd))
3027 .int JMPTBL (L(fwd_write_30bytes), L(table_144_bytes_fwd))
3028 .int JMPTBL (L(fwd_write_31bytes), L(table_144_bytes_fwd))
3029 .int JMPTBL (L(fwd_write_32bytes), L(table_144_bytes_fwd))
3030 .int JMPTBL (L(fwd_write_33bytes), L(table_144_bytes_fwd))
3031 .int JMPTBL (L(fwd_write_34bytes), L(table_144_bytes_fwd))
3032 .int JMPTBL (L(fwd_write_35bytes), L(table_144_bytes_fwd))
3033 .int JMPTBL (L(fwd_write_36bytes), L(table_144_bytes_fwd))
3034 .int JMPTBL (L(fwd_write_37bytes), L(table_144_bytes_fwd))
3035 .int JMPTBL (L(fwd_write_38bytes), L(table_144_bytes_fwd))
3036 .int JMPTBL (L(fwd_write_39bytes), L(table_144_bytes_fwd))
3037 .int JMPTBL (L(fwd_write_40bytes), L(table_144_bytes_fwd))
3038 .int JMPTBL (L(fwd_write_41bytes), L(table_144_bytes_fwd))
3039 .int JMPTBL (L(fwd_write_42bytes), L(table_144_bytes_fwd))
3040 .int JMPTBL (L(fwd_write_43bytes), L(table_144_bytes_fwd))
3041 .int JMPTBL (L(fwd_write_44bytes), L(table_144_bytes_fwd))
3042 .int JMPTBL (L(fwd_write_45bytes), L(table_144_bytes_fwd))
3043 .int JMPTBL (L(fwd_write_46bytes), L(table_144_bytes_fwd))
3044 .int JMPTBL (L(fwd_write_47bytes), L(table_144_bytes_fwd))
3045 .int JMPTBL (L(fwd_write_48bytes), L(table_144_bytes_fwd))
3046 .int JMPTBL (L(fwd_write_49bytes), L(table_144_bytes_fwd))
3047 .int JMPTBL (L(fwd_write_50bytes), L(table_144_bytes_fwd))
3048 .int JMPTBL (L(fwd_write_51bytes), L(table_144_bytes_fwd))
3049 .int JMPTBL (L(fwd_write_52bytes), L(table_144_bytes_fwd))
3050 .int JMPTBL (L(fwd_write_53bytes), L(table_144_bytes_fwd))
3051 .int JMPTBL (L(fwd_write_54bytes), L(table_144_bytes_fwd))
3052 .int JMPTBL (L(fwd_write_55bytes), L(table_144_bytes_fwd))
3053 .int JMPTBL (L(fwd_write_56bytes), L(table_144_bytes_fwd))
3054 .int JMPTBL (L(fwd_write_57bytes), L(table_144_bytes_fwd))
3055 .int JMPTBL (L(fwd_write_58bytes), L(table_144_bytes_fwd))
3056 .int JMPTBL (L(fwd_write_59bytes), L(table_144_bytes_fwd))
3057 .int JMPTBL (L(fwd_write_60bytes), L(table_144_bytes_fwd))
3058 .int JMPTBL (L(fwd_write_61bytes), L(table_144_bytes_fwd))
3059 .int JMPTBL (L(fwd_write_62bytes), L(table_144_bytes_fwd))
3060 .int JMPTBL (L(fwd_write_63bytes), L(table_144_bytes_fwd))
3061 .int JMPTBL (L(fwd_write_64bytes), L(table_144_bytes_fwd))
3062 .int JMPTBL (L(fwd_write_65bytes), L(table_144_bytes_fwd))
3063 .int JMPTBL (L(fwd_write_66bytes), L(table_144_bytes_fwd))
3064 .int JMPTBL (L(fwd_write_67bytes), L(table_144_bytes_fwd))
3065 .int JMPTBL (L(fwd_write_68bytes), L(table_144_bytes_fwd))
3066 .int JMPTBL (L(fwd_write_69bytes), L(table_144_bytes_fwd))
3067 .int JMPTBL (L(fwd_write_70bytes), L(table_144_bytes_fwd))
3068 .int JMPTBL (L(fwd_write_71bytes), L(table_144_bytes_fwd))
3069 .int JMPTBL (L(fwd_write_72bytes), L(table_144_bytes_fwd))
3070 .int JMPTBL (L(fwd_write_73bytes), L(table_144_bytes_fwd))
3071 .int JMPTBL (L(fwd_write_74bytes), L(table_144_bytes_fwd))
3072 .int JMPTBL (L(fwd_write_75bytes), L(table_144_bytes_fwd))
3073 .int JMPTBL (L(fwd_write_76bytes), L(table_144_bytes_fwd))
3074 .int JMPTBL (L(fwd_write_77bytes), L(table_144_bytes_fwd))
3075 .int JMPTBL (L(fwd_write_78bytes), L(table_144_bytes_fwd))
3076 .int JMPTBL (L(fwd_write_79bytes), L(table_144_bytes_fwd))
3077 .int JMPTBL (L(fwd_write_80bytes), L(table_144_bytes_fwd))
3078 .int JMPTBL (L(fwd_write_81bytes), L(table_144_bytes_fwd))
3079 .int JMPTBL (L(fwd_write_82bytes), L(table_144_bytes_fwd))
3080 .int JMPTBL (L(fwd_write_83bytes), L(table_144_bytes_fwd))
3081 .int JMPTBL (L(fwd_write_84bytes), L(table_144_bytes_fwd))
3082 .int JMPTBL (L(fwd_write_85bytes), L(table_144_bytes_fwd))
3083 .int JMPTBL (L(fwd_write_86bytes), L(table_144_bytes_fwd))
3084 .int JMPTBL (L(fwd_write_87bytes), L(table_144_bytes_fwd))
3085 .int JMPTBL (L(fwd_write_88bytes), L(table_144_bytes_fwd))
3086 .int JMPTBL (L(fwd_write_89bytes), L(table_144_bytes_fwd))
3087 .int JMPTBL (L(fwd_write_90bytes), L(table_144_bytes_fwd))
3088 .int JMPTBL (L(fwd_write_91bytes), L(table_144_bytes_fwd))
3089 .int JMPTBL (L(fwd_write_92bytes), L(table_144_bytes_fwd))
3090 .int JMPTBL (L(fwd_write_93bytes), L(table_144_bytes_fwd))
3091 .int JMPTBL (L(fwd_write_94bytes), L(table_144_bytes_fwd))
3092 .int JMPTBL (L(fwd_write_95bytes), L(table_144_bytes_fwd))
3093 .int JMPTBL (L(fwd_write_96bytes), L(table_144_bytes_fwd))
3094 .int JMPTBL (L(fwd_write_97bytes), L(table_144_bytes_fwd))
3095 .int JMPTBL (L(fwd_write_98bytes), L(table_144_bytes_fwd))
3096 .int JMPTBL (L(fwd_write_99bytes), L(table_144_bytes_fwd))
3097 .int JMPTBL (L(fwd_write_100bytes), L(table_144_bytes_fwd))
3098 .int JMPTBL (L(fwd_write_101bytes), L(table_144_bytes_fwd))
3099 .int JMPTBL (L(fwd_write_102bytes), L(table_144_bytes_fwd))
3100 .int JMPTBL (L(fwd_write_103bytes), L(table_144_bytes_fwd))
3101 .int JMPTBL (L(fwd_write_104bytes), L(table_144_bytes_fwd))
3102 .int JMPTBL (L(fwd_write_105bytes), L(table_144_bytes_fwd))
3103 .int JMPTBL (L(fwd_write_106bytes), L(table_144_bytes_fwd))
3104 .int JMPTBL (L(fwd_write_107bytes), L(table_144_bytes_fwd))
3105 .int JMPTBL (L(fwd_write_108bytes), L(table_144_bytes_fwd))
3106 .int JMPTBL (L(fwd_write_109bytes), L(table_144_bytes_fwd))
3107 .int JMPTBL (L(fwd_write_110bytes), L(table_144_bytes_fwd))
3108 .int JMPTBL (L(fwd_write_111bytes), L(table_144_bytes_fwd))
3109 .int JMPTBL (L(fwd_write_112bytes), L(table_144_bytes_fwd))
3110 .int JMPTBL (L(fwd_write_113bytes), L(table_144_bytes_fwd))
3111 .int JMPTBL (L(fwd_write_114bytes), L(table_144_bytes_fwd))
3112 .int JMPTBL (L(fwd_write_115bytes), L(table_144_bytes_fwd))
3113 .int JMPTBL (L(fwd_write_116bytes), L(table_144_bytes_fwd))
3114 .int JMPTBL (L(fwd_write_117bytes), L(table_144_bytes_fwd))
3115 .int JMPTBL (L(fwd_write_118bytes), L(table_144_bytes_fwd))
3116 .int JMPTBL (L(fwd_write_119bytes), L(table_144_bytes_fwd))
3117 .int JMPTBL (L(fwd_write_120bytes), L(table_144_bytes_fwd))
3118 .int JMPTBL (L(fwd_write_121bytes), L(table_144_bytes_fwd))
3119 .int JMPTBL (L(fwd_write_122bytes), L(table_144_bytes_fwd))
3120 .int JMPTBL (L(fwd_write_123bytes), L(table_144_bytes_fwd))
3121 .int JMPTBL (L(fwd_write_124bytes), L(table_144_bytes_fwd))
3122 .int JMPTBL (L(fwd_write_125bytes), L(table_144_bytes_fwd))
3123 .int JMPTBL (L(fwd_write_126bytes), L(table_144_bytes_fwd))
3124 .int JMPTBL (L(fwd_write_127bytes), L(table_144_bytes_fwd))
3125 .int JMPTBL (L(fwd_write_128bytes), L(table_144_bytes_fwd))
3126 .int JMPTBL (L(fwd_write_129bytes), L(table_144_bytes_fwd))
3127 .int JMPTBL (L(fwd_write_130bytes), L(table_144_bytes_fwd))
3128 .int JMPTBL (L(fwd_write_131bytes), L(table_144_bytes_fwd))
3129 .int JMPTBL (L(fwd_write_132bytes), L(table_144_bytes_fwd))
3130 .int JMPTBL (L(fwd_write_133bytes), L(table_144_bytes_fwd))
3131 .int JMPTBL (L(fwd_write_134bytes), L(table_144_bytes_fwd))
3132 .int JMPTBL (L(fwd_write_135bytes), L(table_144_bytes_fwd))
3133 .int JMPTBL (L(fwd_write_136bytes), L(table_144_bytes_fwd))
3134 .int JMPTBL (L(fwd_write_137bytes), L(table_144_bytes_fwd))
3135 .int JMPTBL (L(fwd_write_138bytes), L(table_144_bytes_fwd))
3136 .int JMPTBL (L(fwd_write_139bytes), L(table_144_bytes_fwd))
3137 .int JMPTBL (L(fwd_write_140bytes), L(table_144_bytes_fwd))
3138 .int JMPTBL (L(fwd_write_141bytes), L(table_144_bytes_fwd))
3139 .int JMPTBL (L(fwd_write_142bytes), L(table_144_bytes_fwd))
3140 .int JMPTBL (L(fwd_write_143bytes), L(table_144_bytes_fwd))
3141
3142 .p2align 3
3143 L(shl_table_fwd):
3144 .int JMPTBL (L(shl_0), L(shl_table_fwd))
3145 .int JMPTBL (L(shl_1), L(shl_table_fwd))
3146 .int JMPTBL (L(shl_2), L(shl_table_fwd))
3147 .int JMPTBL (L(shl_3), L(shl_table_fwd))
3148 .int JMPTBL (L(shl_4), L(shl_table_fwd))
3149 .int JMPTBL (L(shl_5), L(shl_table_fwd))
3150 .int JMPTBL (L(shl_6), L(shl_table_fwd))
3151 .int JMPTBL (L(shl_7), L(shl_table_fwd))
3152 .int JMPTBL (L(shl_8), L(shl_table_fwd))
3153 .int JMPTBL (L(shl_9), L(shl_table_fwd))
3154 .int JMPTBL (L(shl_10), L(shl_table_fwd))
3155 .int JMPTBL (L(shl_11), L(shl_table_fwd))
3156 .int JMPTBL (L(shl_12), L(shl_table_fwd))
3157 .int JMPTBL (L(shl_13), L(shl_table_fwd))
3158 .int JMPTBL (L(shl_14), L(shl_table_fwd))
3159 .int JMPTBL (L(shl_15), L(shl_table_fwd))
3160
3161 .p2align 3
3162 L(shl_table_bwd):
3163 .int JMPTBL (L(shl_0_bwd), L(shl_table_bwd))
3164 .int JMPTBL (L(shl_1_bwd), L(shl_table_bwd))
3165 .int JMPTBL (L(shl_2_bwd), L(shl_table_bwd))
3166 .int JMPTBL (L(shl_3_bwd), L(shl_table_bwd))
3167 .int JMPTBL (L(shl_4_bwd), L(shl_table_bwd))
3168 .int JMPTBL (L(shl_5_bwd), L(shl_table_bwd))
3169 .int JMPTBL (L(shl_6_bwd), L(shl_table_bwd))
3170 .int JMPTBL (L(shl_7_bwd), L(shl_table_bwd))
3171 .int JMPTBL (L(shl_8_bwd), L(shl_table_bwd))
3172 .int JMPTBL (L(shl_9_bwd), L(shl_table_bwd))
3173 .int JMPTBL (L(shl_10_bwd), L(shl_table_bwd))
3174 .int JMPTBL (L(shl_11_bwd), L(shl_table_bwd))
3175 .int JMPTBL (L(shl_12_bwd), L(shl_table_bwd))
3176 .int JMPTBL (L(shl_13_bwd), L(shl_table_bwd))
3177 .int JMPTBL (L(shl_14_bwd), L(shl_table_bwd))
3178 .int JMPTBL (L(shl_15_bwd), L(shl_table_bwd))
3179
3180 #endif