]> git.ipfire.org Git - thirdparty/glibc.git/blame - sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
correct LC_TELEPHONE for pap locales
[thirdparty/glibc.git] / sysdeps / x86_64 / multiarch / memcpy-ssse3-back.S
CommitLineData
6fb8cbcb 1/* memcpy with SSSE3 and REP string
568035b7 2 Copyright (C) 2010-2013 Free Software Foundation, Inc.
6fb8cbcb
L
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
59ba27a6
PE
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
6fb8cbcb
L
19
20#include <sysdep.h>
21
22#if !defined NOT_IN_libc \
23 && (defined SHARED \
24 || defined USE_AS_MEMMOVE \
25 || !defined USE_MULTIARCH)
26
27#include "asm-syntax.h"
28
29#ifndef MEMCPY
30# define MEMCPY __memcpy_ssse3_back
31# define MEMCPY_CHK __memcpy_chk_ssse3_back
32#endif
33
34#ifndef ALIGN
35# define ALIGN(n) .p2align n
36#endif
37
38#define JMPTBL(I, B) I - B
39
40/* Branch to an entry in a jump table. TABLE is a jump table with
41 relative offsets. INDEX is a register contains the index into the
42 jump table. SCALE is the scale of INDEX. */
43#define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
44 lea TABLE(%rip), %r11; \
45 movslq (%r11, INDEX, SCALE), INDEX; \
46 lea (%r11, INDEX), INDEX; \
47 jmp *INDEX; \
48 ud2
49
50 .section .text.ssse3,"ax",@progbits
4c559bcd 51#if !defined USE_AS_BCOPY
6fb8cbcb
L
52ENTRY (MEMCPY_CHK)
53 cmpq %rdx, %rcx
54 jb HIDDEN_JUMPTARGET (__chk_fail)
55END (MEMCPY_CHK)
56#endif
57
58ENTRY (MEMCPY)
59 mov %rdi, %rax
60#ifdef USE_AS_MEMPCPY
61 add %rdx, %rax
62#endif
63
64#ifdef USE_AS_MEMMOVE
65 cmp %rsi, %rdi
66 jb L(copy_forward)
67 je L(bwd_write_0bytes)
68 cmp $144, %rdx
69 jae L(copy_backward)
70 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
71L(copy_forward):
72#endif
73 cmp $144, %rdx
74 jae L(144bytesormore)
75
76L(fwd_write_less32bytes):
77#ifndef USE_AS_MEMMOVE
78 cmp %dil, %sil
79 jbe L(bk_write)
80#endif
81 add %rdx, %rsi
82 add %rdx, %rdi
83 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
84#ifndef USE_AS_MEMMOVE
85L(bk_write):
86
87 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
88#endif
89
90 ALIGN (4)
91L(144bytesormore):
92
93#ifndef USE_AS_MEMMOVE
94 cmp %dil, %sil
95 jle L(copy_backward)
96#endif
97 movdqu (%rsi), %xmm0
98 mov %rdi, %r8
99 and $-16, %rdi
100 add $16, %rdi
101 mov %rdi, %r9
102 sub %r8, %r9
103 sub %r9, %rdx
104 add %r9, %rsi
105 mov %rsi, %r9
106 and $0xf, %r9
107 jz L(shl_0)
108#ifdef DATA_CACHE_SIZE
6d2850e7 109 mov $DATA_CACHE_SIZE, %RCX_LP
6fb8cbcb 110#else
afec409a 111 mov __x86_data_cache_size(%rip), %RCX_LP
6fb8cbcb
L
112#endif
113 cmp %rcx, %rdx
114 jae L(gobble_mem_fwd)
115 lea L(shl_table_fwd)(%rip), %r11
116 sub $0x80, %rdx
117 movslq (%r11, %r9, 4), %r9
118 add %r11, %r9
119 jmp *%r9
120 ud2
121
122 ALIGN (4)
123L(copy_backward):
124#ifdef DATA_CACHE_SIZE
6d2850e7 125 mov $DATA_CACHE_SIZE, %RCX_LP
6fb8cbcb 126#else
afec409a 127 mov __x86_data_cache_size(%rip), %RCX_LP
6fb8cbcb
L
128#endif
129 shl $1, %rcx
130 cmp %rcx, %rdx
131 ja L(gobble_mem_bwd)
132
133 add %rdx, %rdi
134 add %rdx, %rsi
135 movdqu -16(%rsi), %xmm0
136 lea -16(%rdi), %r8
137 mov %rdi, %r9
138 and $0xf, %r9
139 xor %r9, %rdi
140 sub %r9, %rsi
141 sub %r9, %rdx
142 mov %rsi, %r9
143 and $0xf, %r9
144 jz L(shl_0_bwd)
145 lea L(shl_table_bwd)(%rip), %r11
146 sub $0x80, %rdx
147 movslq (%r11, %r9, 4), %r9
148 add %r11, %r9
149 jmp *%r9
150 ud2
151
152 ALIGN (4)
153L(shl_0):
154
155 mov %rdx, %r9
156 shr $8, %r9
157 add %rdx, %r9
158#ifdef DATA_CACHE_SIZE
6d2850e7 159 cmp $DATA_CACHE_SIZE_HALF, %R9_LP
6fb8cbcb 160#else
afec409a 161 cmp __x86_data_cache_size_half(%rip), %R9_LP
6fb8cbcb
L
162#endif
163 jae L(gobble_mem_fwd)
164 sub $0x80, %rdx
165 ALIGN (4)
166L(shl_0_loop):
167 movdqa (%rsi), %xmm1
168 movdqa %xmm1, (%rdi)
169 movaps 0x10(%rsi), %xmm2
170 movaps %xmm2, 0x10(%rdi)
171 movaps 0x20(%rsi), %xmm3
172 movaps %xmm3, 0x20(%rdi)
173 movaps 0x30(%rsi), %xmm4
174 movaps %xmm4, 0x30(%rdi)
175 movaps 0x40(%rsi), %xmm1
176 movaps %xmm1, 0x40(%rdi)
177 movaps 0x50(%rsi), %xmm2
178 movaps %xmm2, 0x50(%rdi)
179 movaps 0x60(%rsi), %xmm3
180 movaps %xmm3, 0x60(%rdi)
181 movaps 0x70(%rsi), %xmm4
182 movaps %xmm4, 0x70(%rdi)
183 sub $0x80, %rdx
184 lea 0x80(%rsi), %rsi
185 lea 0x80(%rdi), %rdi
186 jae L(shl_0_loop)
187 movdqu %xmm0, (%r8)
188 add $0x80, %rdx
189 add %rdx, %rsi
190 add %rdx, %rdi
191 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
192
193 ALIGN (4)
194L(shl_0_bwd):
195 sub $0x80, %rdx
196L(copy_backward_loop):
197 movaps -0x10(%rsi), %xmm1
198 movaps %xmm1, -0x10(%rdi)
199 movaps -0x20(%rsi), %xmm2
200 movaps %xmm2, -0x20(%rdi)
201 movaps -0x30(%rsi), %xmm3
202 movaps %xmm3, -0x30(%rdi)
203 movaps -0x40(%rsi), %xmm4
204 movaps %xmm4, -0x40(%rdi)
205 movaps -0x50(%rsi), %xmm5
206 movaps %xmm5, -0x50(%rdi)
207 movaps -0x60(%rsi), %xmm5
208 movaps %xmm5, -0x60(%rdi)
209 movaps -0x70(%rsi), %xmm5
210 movaps %xmm5, -0x70(%rdi)
211 movaps -0x80(%rsi), %xmm5
212 movaps %xmm5, -0x80(%rdi)
213 sub $0x80, %rdx
214 lea -0x80(%rdi), %rdi
215 lea -0x80(%rsi), %rsi
216 jae L(copy_backward_loop)
217
218 movdqu %xmm0, (%r8)
219 add $0x80, %rdx
220 sub %rdx, %rdi
221 sub %rdx, %rsi
222 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
223
224 ALIGN (4)
225L(shl_1):
226 sub $0x80, %rdx
227 movaps -0x01(%rsi), %xmm1
228 movaps 0x0f(%rsi), %xmm2
229 movaps 0x1f(%rsi), %xmm3
230 movaps 0x2f(%rsi), %xmm4
231 movaps 0x3f(%rsi), %xmm5
232 movaps 0x4f(%rsi), %xmm6
233 movaps 0x5f(%rsi), %xmm7
234 movaps 0x6f(%rsi), %xmm8
235 movaps 0x7f(%rsi), %xmm9
236 lea 0x80(%rsi), %rsi
237 palignr $1, %xmm8, %xmm9
238 movaps %xmm9, 0x70(%rdi)
239 palignr $1, %xmm7, %xmm8
240 movaps %xmm8, 0x60(%rdi)
241 palignr $1, %xmm6, %xmm7
242 movaps %xmm7, 0x50(%rdi)
243 palignr $1, %xmm5, %xmm6
244 movaps %xmm6, 0x40(%rdi)
245 palignr $1, %xmm4, %xmm5
246 movaps %xmm5, 0x30(%rdi)
247 palignr $1, %xmm3, %xmm4
248 movaps %xmm4, 0x20(%rdi)
249 palignr $1, %xmm2, %xmm3
250 movaps %xmm3, 0x10(%rdi)
251 palignr $1, %xmm1, %xmm2
252 movaps %xmm2, (%rdi)
253 lea 0x80(%rdi), %rdi
254 jae L(shl_1)
255 movdqu %xmm0, (%r8)
256 add $0x80, %rdx
257 add %rdx, %rdi
258 add %rdx, %rsi
259 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
260
261 ALIGN (4)
262L(shl_1_bwd):
263 movaps -0x01(%rsi), %xmm1
264
265 movaps -0x11(%rsi), %xmm2
266 palignr $1, %xmm2, %xmm1
267 movaps %xmm1, -0x10(%rdi)
268
269 movaps -0x21(%rsi), %xmm3
270 palignr $1, %xmm3, %xmm2
271 movaps %xmm2, -0x20(%rdi)
272
273 movaps -0x31(%rsi), %xmm4
274 palignr $1, %xmm4, %xmm3
275 movaps %xmm3, -0x30(%rdi)
276
277 movaps -0x41(%rsi), %xmm5
278 palignr $1, %xmm5, %xmm4
279 movaps %xmm4, -0x40(%rdi)
280
281 movaps -0x51(%rsi), %xmm6
282 palignr $1, %xmm6, %xmm5
283 movaps %xmm5, -0x50(%rdi)
284
285 movaps -0x61(%rsi), %xmm7
286 palignr $1, %xmm7, %xmm6
287 movaps %xmm6, -0x60(%rdi)
288
289 movaps -0x71(%rsi), %xmm8
290 palignr $1, %xmm8, %xmm7
291 movaps %xmm7, -0x70(%rdi)
292
293 movaps -0x81(%rsi), %xmm9
294 palignr $1, %xmm9, %xmm8
295 movaps %xmm8, -0x80(%rdi)
296
297 sub $0x80, %rdx
298 lea -0x80(%rdi), %rdi
299 lea -0x80(%rsi), %rsi
300 jae L(shl_1_bwd)
301 movdqu %xmm0, (%r8)
302 add $0x80, %rdx
303 sub %rdx, %rdi
304 sub %rdx, %rsi
305 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
306
307 ALIGN (4)
308L(shl_2):
309 sub $0x80, %rdx
310 movaps -0x02(%rsi), %xmm1
311 movaps 0x0e(%rsi), %xmm2
312 movaps 0x1e(%rsi), %xmm3
313 movaps 0x2e(%rsi), %xmm4
314 movaps 0x3e(%rsi), %xmm5
315 movaps 0x4e(%rsi), %xmm6
316 movaps 0x5e(%rsi), %xmm7
317 movaps 0x6e(%rsi), %xmm8
318 movaps 0x7e(%rsi), %xmm9
319 lea 0x80(%rsi), %rsi
320 palignr $2, %xmm8, %xmm9
321 movaps %xmm9, 0x70(%rdi)
322 palignr $2, %xmm7, %xmm8
323 movaps %xmm8, 0x60(%rdi)
324 palignr $2, %xmm6, %xmm7
325 movaps %xmm7, 0x50(%rdi)
326 palignr $2, %xmm5, %xmm6
327 movaps %xmm6, 0x40(%rdi)
328 palignr $2, %xmm4, %xmm5
329 movaps %xmm5, 0x30(%rdi)
330 palignr $2, %xmm3, %xmm4
331 movaps %xmm4, 0x20(%rdi)
332 palignr $2, %xmm2, %xmm3
333 movaps %xmm3, 0x10(%rdi)
334 palignr $2, %xmm1, %xmm2
335 movaps %xmm2, (%rdi)
336 lea 0x80(%rdi), %rdi
337 jae L(shl_2)
338 movdqu %xmm0, (%r8)
339 add $0x80, %rdx
340 add %rdx, %rdi
341 add %rdx, %rsi
342 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
343
344 ALIGN (4)
345L(shl_2_bwd):
346 movaps -0x02(%rsi), %xmm1
347
348 movaps -0x12(%rsi), %xmm2
349 palignr $2, %xmm2, %xmm1
350 movaps %xmm1, -0x10(%rdi)
351
352 movaps -0x22(%rsi), %xmm3
353 palignr $2, %xmm3, %xmm2
354 movaps %xmm2, -0x20(%rdi)
355
356 movaps -0x32(%rsi), %xmm4
357 palignr $2, %xmm4, %xmm3
358 movaps %xmm3, -0x30(%rdi)
359
360 movaps -0x42(%rsi), %xmm5
361 palignr $2, %xmm5, %xmm4
362 movaps %xmm4, -0x40(%rdi)
363
364 movaps -0x52(%rsi), %xmm6
365 palignr $2, %xmm6, %xmm5
366 movaps %xmm5, -0x50(%rdi)
367
368 movaps -0x62(%rsi), %xmm7
369 palignr $2, %xmm7, %xmm6
370 movaps %xmm6, -0x60(%rdi)
371
372 movaps -0x72(%rsi), %xmm8
373 palignr $2, %xmm8, %xmm7
374 movaps %xmm7, -0x70(%rdi)
375
376 movaps -0x82(%rsi), %xmm9
377 palignr $2, %xmm9, %xmm8
378 movaps %xmm8, -0x80(%rdi)
379
380 sub $0x80, %rdx
381 lea -0x80(%rdi), %rdi
382 lea -0x80(%rsi), %rsi
383 jae L(shl_2_bwd)
384 movdqu %xmm0, (%r8)
385 add $0x80, %rdx
386 sub %rdx, %rdi
387 sub %rdx, %rsi
388 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
389
390 ALIGN (4)
391L(shl_3):
392 sub $0x80, %rdx
393 movaps -0x03(%rsi), %xmm1
394 movaps 0x0d(%rsi), %xmm2
395 movaps 0x1d(%rsi), %xmm3
396 movaps 0x2d(%rsi), %xmm4
397 movaps 0x3d(%rsi), %xmm5
398 movaps 0x4d(%rsi), %xmm6
399 movaps 0x5d(%rsi), %xmm7
400 movaps 0x6d(%rsi), %xmm8
401 movaps 0x7d(%rsi), %xmm9
402 lea 0x80(%rsi), %rsi
403 palignr $3, %xmm8, %xmm9
404 movaps %xmm9, 0x70(%rdi)
405 palignr $3, %xmm7, %xmm8
406 movaps %xmm8, 0x60(%rdi)
407 palignr $3, %xmm6, %xmm7
408 movaps %xmm7, 0x50(%rdi)
409 palignr $3, %xmm5, %xmm6
410 movaps %xmm6, 0x40(%rdi)
411 palignr $3, %xmm4, %xmm5
412 movaps %xmm5, 0x30(%rdi)
413 palignr $3, %xmm3, %xmm4
414 movaps %xmm4, 0x20(%rdi)
415 palignr $3, %xmm2, %xmm3
416 movaps %xmm3, 0x10(%rdi)
417 palignr $3, %xmm1, %xmm2
418 movaps %xmm2, (%rdi)
419 lea 0x80(%rdi), %rdi
420 jae L(shl_3)
421 movdqu %xmm0, (%r8)
422 add $0x80, %rdx
423 add %rdx, %rdi
424 add %rdx, %rsi
425 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
426
427 ALIGN (4)
428L(shl_3_bwd):
429 movaps -0x03(%rsi), %xmm1
430
431 movaps -0x13(%rsi), %xmm2
432 palignr $3, %xmm2, %xmm1
433 movaps %xmm1, -0x10(%rdi)
434
435 movaps -0x23(%rsi), %xmm3
436 palignr $3, %xmm3, %xmm2
437 movaps %xmm2, -0x20(%rdi)
438
439 movaps -0x33(%rsi), %xmm4
440 palignr $3, %xmm4, %xmm3
441 movaps %xmm3, -0x30(%rdi)
442
443 movaps -0x43(%rsi), %xmm5
444 palignr $3, %xmm5, %xmm4
445 movaps %xmm4, -0x40(%rdi)
446
447 movaps -0x53(%rsi), %xmm6
448 palignr $3, %xmm6, %xmm5
449 movaps %xmm5, -0x50(%rdi)
450
451 movaps -0x63(%rsi), %xmm7
452 palignr $3, %xmm7, %xmm6
453 movaps %xmm6, -0x60(%rdi)
454
455 movaps -0x73(%rsi), %xmm8
456 palignr $3, %xmm8, %xmm7
457 movaps %xmm7, -0x70(%rdi)
458
459 movaps -0x83(%rsi), %xmm9
460 palignr $3, %xmm9, %xmm8
461 movaps %xmm8, -0x80(%rdi)
462
463 sub $0x80, %rdx
464 lea -0x80(%rdi), %rdi
465 lea -0x80(%rsi), %rsi
466 jae L(shl_3_bwd)
467 movdqu %xmm0, (%r8)
468 add $0x80, %rdx
469 sub %rdx, %rdi
470 sub %rdx, %rsi
471 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
472
473 ALIGN (4)
474L(shl_4):
475 sub $0x80, %rdx
476 movaps -0x04(%rsi), %xmm1
477 movaps 0x0c(%rsi), %xmm2
478 movaps 0x1c(%rsi), %xmm3
479 movaps 0x2c(%rsi), %xmm4
480 movaps 0x3c(%rsi), %xmm5
481 movaps 0x4c(%rsi), %xmm6
482 movaps 0x5c(%rsi), %xmm7
483 movaps 0x6c(%rsi), %xmm8
484 movaps 0x7c(%rsi), %xmm9
485 lea 0x80(%rsi), %rsi
486 palignr $4, %xmm8, %xmm9
487 movaps %xmm9, 0x70(%rdi)
488 palignr $4, %xmm7, %xmm8
489 movaps %xmm8, 0x60(%rdi)
490 palignr $4, %xmm6, %xmm7
491 movaps %xmm7, 0x50(%rdi)
492 palignr $4, %xmm5, %xmm6
493 movaps %xmm6, 0x40(%rdi)
494 palignr $4, %xmm4, %xmm5
495 movaps %xmm5, 0x30(%rdi)
496 palignr $4, %xmm3, %xmm4
497 movaps %xmm4, 0x20(%rdi)
498 palignr $4, %xmm2, %xmm3
499 movaps %xmm3, 0x10(%rdi)
500 palignr $4, %xmm1, %xmm2
501 movaps %xmm2, (%rdi)
502 lea 0x80(%rdi), %rdi
503 jae L(shl_4)
504 movdqu %xmm0, (%r8)
505 add $0x80, %rdx
506 add %rdx, %rdi
507 add %rdx, %rsi
508 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
509
510 ALIGN (4)
511L(shl_4_bwd):
512 movaps -0x04(%rsi), %xmm1
513
514 movaps -0x14(%rsi), %xmm2
515 palignr $4, %xmm2, %xmm1
516 movaps %xmm1, -0x10(%rdi)
517
518 movaps -0x24(%rsi), %xmm3
519 palignr $4, %xmm3, %xmm2
520 movaps %xmm2, -0x20(%rdi)
521
522 movaps -0x34(%rsi), %xmm4
523 palignr $4, %xmm4, %xmm3
524 movaps %xmm3, -0x30(%rdi)
525
526 movaps -0x44(%rsi), %xmm5
527 palignr $4, %xmm5, %xmm4
528 movaps %xmm4, -0x40(%rdi)
529
530 movaps -0x54(%rsi), %xmm6
531 palignr $4, %xmm6, %xmm5
532 movaps %xmm5, -0x50(%rdi)
533
534 movaps -0x64(%rsi), %xmm7
535 palignr $4, %xmm7, %xmm6
536 movaps %xmm6, -0x60(%rdi)
537
538 movaps -0x74(%rsi), %xmm8
539 palignr $4, %xmm8, %xmm7
540 movaps %xmm7, -0x70(%rdi)
541
542 movaps -0x84(%rsi), %xmm9
543 palignr $4, %xmm9, %xmm8
544 movaps %xmm8, -0x80(%rdi)
545
546 sub $0x80, %rdx
547 lea -0x80(%rdi), %rdi
548 lea -0x80(%rsi), %rsi
549 jae L(shl_4_bwd)
550 movdqu %xmm0, (%r8)
551 add $0x80, %rdx
552 sub %rdx, %rdi
553 sub %rdx, %rsi
554 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
555
556 ALIGN (4)
557L(shl_5):
558 sub $0x80, %rdx
559 movaps -0x05(%rsi), %xmm1
560 movaps 0x0b(%rsi), %xmm2
561 movaps 0x1b(%rsi), %xmm3
562 movaps 0x2b(%rsi), %xmm4
563 movaps 0x3b(%rsi), %xmm5
564 movaps 0x4b(%rsi), %xmm6
565 movaps 0x5b(%rsi), %xmm7
566 movaps 0x6b(%rsi), %xmm8
567 movaps 0x7b(%rsi), %xmm9
568 lea 0x80(%rsi), %rsi
569 palignr $5, %xmm8, %xmm9
570 movaps %xmm9, 0x70(%rdi)
571 palignr $5, %xmm7, %xmm8
572 movaps %xmm8, 0x60(%rdi)
573 palignr $5, %xmm6, %xmm7
574 movaps %xmm7, 0x50(%rdi)
575 palignr $5, %xmm5, %xmm6
576 movaps %xmm6, 0x40(%rdi)
577 palignr $5, %xmm4, %xmm5
578 movaps %xmm5, 0x30(%rdi)
579 palignr $5, %xmm3, %xmm4
580 movaps %xmm4, 0x20(%rdi)
581 palignr $5, %xmm2, %xmm3
582 movaps %xmm3, 0x10(%rdi)
583 palignr $5, %xmm1, %xmm2
584 movaps %xmm2, (%rdi)
585 lea 0x80(%rdi), %rdi
586 jae L(shl_5)
587 movdqu %xmm0, (%r8)
588 add $0x80, %rdx
589 add %rdx, %rdi
590 add %rdx, %rsi
591 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
592
593 ALIGN (4)
594L(shl_5_bwd):
595 movaps -0x05(%rsi), %xmm1
596
597 movaps -0x15(%rsi), %xmm2
598 palignr $5, %xmm2, %xmm1
599 movaps %xmm1, -0x10(%rdi)
600
601 movaps -0x25(%rsi), %xmm3
602 palignr $5, %xmm3, %xmm2
603 movaps %xmm2, -0x20(%rdi)
604
605 movaps -0x35(%rsi), %xmm4
606 palignr $5, %xmm4, %xmm3
607 movaps %xmm3, -0x30(%rdi)
608
609 movaps -0x45(%rsi), %xmm5
610 palignr $5, %xmm5, %xmm4
611 movaps %xmm4, -0x40(%rdi)
612
613 movaps -0x55(%rsi), %xmm6
614 palignr $5, %xmm6, %xmm5
615 movaps %xmm5, -0x50(%rdi)
616
617 movaps -0x65(%rsi), %xmm7
618 palignr $5, %xmm7, %xmm6
619 movaps %xmm6, -0x60(%rdi)
620
621 movaps -0x75(%rsi), %xmm8
622 palignr $5, %xmm8, %xmm7
623 movaps %xmm7, -0x70(%rdi)
624
625 movaps -0x85(%rsi), %xmm9
626 palignr $5, %xmm9, %xmm8
627 movaps %xmm8, -0x80(%rdi)
628
629 sub $0x80, %rdx
630 lea -0x80(%rdi), %rdi
631 lea -0x80(%rsi), %rsi
632 jae L(shl_5_bwd)
633 movdqu %xmm0, (%r8)
634 add $0x80, %rdx
635 sub %rdx, %rdi
636 sub %rdx, %rsi
637 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
638
639 ALIGN (4)
640L(shl_6):
641 sub $0x80, %rdx
642 movaps -0x06(%rsi), %xmm1
643 movaps 0x0a(%rsi), %xmm2
644 movaps 0x1a(%rsi), %xmm3
645 movaps 0x2a(%rsi), %xmm4
646 movaps 0x3a(%rsi), %xmm5
647 movaps 0x4a(%rsi), %xmm6
648 movaps 0x5a(%rsi), %xmm7
649 movaps 0x6a(%rsi), %xmm8
650 movaps 0x7a(%rsi), %xmm9
651 lea 0x80(%rsi), %rsi
652 palignr $6, %xmm8, %xmm9
653 movaps %xmm9, 0x70(%rdi)
654 palignr $6, %xmm7, %xmm8
655 movaps %xmm8, 0x60(%rdi)
656 palignr $6, %xmm6, %xmm7
657 movaps %xmm7, 0x50(%rdi)
658 palignr $6, %xmm5, %xmm6
659 movaps %xmm6, 0x40(%rdi)
660 palignr $6, %xmm4, %xmm5
661 movaps %xmm5, 0x30(%rdi)
662 palignr $6, %xmm3, %xmm4
663 movaps %xmm4, 0x20(%rdi)
664 palignr $6, %xmm2, %xmm3
665 movaps %xmm3, 0x10(%rdi)
666 palignr $6, %xmm1, %xmm2
667 movaps %xmm2, (%rdi)
668 lea 0x80(%rdi), %rdi
669 jae L(shl_6)
670 movdqu %xmm0, (%r8)
671 add $0x80, %rdx
672 add %rdx, %rdi
673 add %rdx, %rsi
674 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
675
676 ALIGN (4)
677L(shl_6_bwd):
678 movaps -0x06(%rsi), %xmm1
679
680 movaps -0x16(%rsi), %xmm2
681 palignr $6, %xmm2, %xmm1
682 movaps %xmm1, -0x10(%rdi)
683
684 movaps -0x26(%rsi), %xmm3
685 palignr $6, %xmm3, %xmm2
686 movaps %xmm2, -0x20(%rdi)
687
688 movaps -0x36(%rsi), %xmm4
689 palignr $6, %xmm4, %xmm3
690 movaps %xmm3, -0x30(%rdi)
691
692 movaps -0x46(%rsi), %xmm5
693 palignr $6, %xmm5, %xmm4
694 movaps %xmm4, -0x40(%rdi)
695
696 movaps -0x56(%rsi), %xmm6
697 palignr $6, %xmm6, %xmm5
698 movaps %xmm5, -0x50(%rdi)
699
700 movaps -0x66(%rsi), %xmm7
701 palignr $6, %xmm7, %xmm6
702 movaps %xmm6, -0x60(%rdi)
703
704 movaps -0x76(%rsi), %xmm8
705 palignr $6, %xmm8, %xmm7
706 movaps %xmm7, -0x70(%rdi)
707
708 movaps -0x86(%rsi), %xmm9
709 palignr $6, %xmm9, %xmm8
710 movaps %xmm8, -0x80(%rdi)
711
712 sub $0x80, %rdx
713 lea -0x80(%rdi), %rdi
714 lea -0x80(%rsi), %rsi
715 jae L(shl_6_bwd)
716 movdqu %xmm0, (%r8)
717 add $0x80, %rdx
718 sub %rdx, %rdi
719 sub %rdx, %rsi
720 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
721
722 ALIGN (4)
723L(shl_7):
724 sub $0x80, %rdx
725 movaps -0x07(%rsi), %xmm1
726 movaps 0x09(%rsi), %xmm2
727 movaps 0x19(%rsi), %xmm3
728 movaps 0x29(%rsi), %xmm4
729 movaps 0x39(%rsi), %xmm5
730 movaps 0x49(%rsi), %xmm6
731 movaps 0x59(%rsi), %xmm7
732 movaps 0x69(%rsi), %xmm8
733 movaps 0x79(%rsi), %xmm9
734 lea 0x80(%rsi), %rsi
735 palignr $7, %xmm8, %xmm9
736 movaps %xmm9, 0x70(%rdi)
737 palignr $7, %xmm7, %xmm8
738 movaps %xmm8, 0x60(%rdi)
739 palignr $7, %xmm6, %xmm7
740 movaps %xmm7, 0x50(%rdi)
741 palignr $7, %xmm5, %xmm6
742 movaps %xmm6, 0x40(%rdi)
743 palignr $7, %xmm4, %xmm5
744 movaps %xmm5, 0x30(%rdi)
745 palignr $7, %xmm3, %xmm4
746 movaps %xmm4, 0x20(%rdi)
747 palignr $7, %xmm2, %xmm3
748 movaps %xmm3, 0x10(%rdi)
749 palignr $7, %xmm1, %xmm2
750 movaps %xmm2, (%rdi)
751 lea 0x80(%rdi), %rdi
752 jae L(shl_7)
753 movdqu %xmm0, (%r8)
754 add $0x80, %rdx
755 add %rdx, %rdi
756 add %rdx, %rsi
757 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
758
759 ALIGN (4)
760L(shl_7_bwd):
761 movaps -0x07(%rsi), %xmm1
762
763 movaps -0x17(%rsi), %xmm2
764 palignr $7, %xmm2, %xmm1
765 movaps %xmm1, -0x10(%rdi)
766
767 movaps -0x27(%rsi), %xmm3
768 palignr $7, %xmm3, %xmm2
769 movaps %xmm2, -0x20(%rdi)
770
771 movaps -0x37(%rsi), %xmm4
772 palignr $7, %xmm4, %xmm3
773 movaps %xmm3, -0x30(%rdi)
774
775 movaps -0x47(%rsi), %xmm5
776 palignr $7, %xmm5, %xmm4
777 movaps %xmm4, -0x40(%rdi)
778
779 movaps -0x57(%rsi), %xmm6
780 palignr $7, %xmm6, %xmm5
781 movaps %xmm5, -0x50(%rdi)
782
783 movaps -0x67(%rsi), %xmm7
784 palignr $7, %xmm7, %xmm6
785 movaps %xmm6, -0x60(%rdi)
786
787 movaps -0x77(%rsi), %xmm8
788 palignr $7, %xmm8, %xmm7
789 movaps %xmm7, -0x70(%rdi)
790
791 movaps -0x87(%rsi), %xmm9
792 palignr $7, %xmm9, %xmm8
793 movaps %xmm8, -0x80(%rdi)
794
795 sub $0x80, %rdx
796 lea -0x80(%rdi), %rdi
797 lea -0x80(%rsi), %rsi
798 jae L(shl_7_bwd)
799 movdqu %xmm0, (%r8)
800 add $0x80, %rdx
801 sub %rdx, %rdi
802 sub %rdx, %rsi
803 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
804
805 ALIGN (4)
806L(shl_8):
807 sub $0x80, %rdx
808 movaps -0x08(%rsi), %xmm1
809 movaps 0x08(%rsi), %xmm2
810 movaps 0x18(%rsi), %xmm3
811 movaps 0x28(%rsi), %xmm4
812 movaps 0x38(%rsi), %xmm5
813 movaps 0x48(%rsi), %xmm6
814 movaps 0x58(%rsi), %xmm7
815 movaps 0x68(%rsi), %xmm8
816 movaps 0x78(%rsi), %xmm9
817 lea 0x80(%rsi), %rsi
818 palignr $8, %xmm8, %xmm9
819 movaps %xmm9, 0x70(%rdi)
820 palignr $8, %xmm7, %xmm8
821 movaps %xmm8, 0x60(%rdi)
822 palignr $8, %xmm6, %xmm7
823 movaps %xmm7, 0x50(%rdi)
824 palignr $8, %xmm5, %xmm6
825 movaps %xmm6, 0x40(%rdi)
826 palignr $8, %xmm4, %xmm5
827 movaps %xmm5, 0x30(%rdi)
828 palignr $8, %xmm3, %xmm4
829 movaps %xmm4, 0x20(%rdi)
830 palignr $8, %xmm2, %xmm3
831 movaps %xmm3, 0x10(%rdi)
832 palignr $8, %xmm1, %xmm2
833 movaps %xmm2, (%rdi)
834 lea 0x80(%rdi), %rdi
835 jae L(shl_8)
836 movdqu %xmm0, (%r8)
837 add $0x80, %rdx
838 add %rdx, %rdi
839 add %rdx, %rsi
840 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
841
842 ALIGN (4)
843L(shl_8_bwd):
844 movaps -0x08(%rsi), %xmm1
845
846 movaps -0x18(%rsi), %xmm2
847 palignr $8, %xmm2, %xmm1
848 movaps %xmm1, -0x10(%rdi)
849
850 movaps -0x28(%rsi), %xmm3
851 palignr $8, %xmm3, %xmm2
852 movaps %xmm2, -0x20(%rdi)
853
854 movaps -0x38(%rsi), %xmm4
855 palignr $8, %xmm4, %xmm3
856 movaps %xmm3, -0x30(%rdi)
857
858 movaps -0x48(%rsi), %xmm5
859 palignr $8, %xmm5, %xmm4
860 movaps %xmm4, -0x40(%rdi)
861
862 movaps -0x58(%rsi), %xmm6
863 palignr $8, %xmm6, %xmm5
864 movaps %xmm5, -0x50(%rdi)
865
866 movaps -0x68(%rsi), %xmm7
867 palignr $8, %xmm7, %xmm6
868 movaps %xmm6, -0x60(%rdi)
869
870 movaps -0x78(%rsi), %xmm8
871 palignr $8, %xmm8, %xmm7
872 movaps %xmm7, -0x70(%rdi)
873
874 movaps -0x88(%rsi), %xmm9
875 palignr $8, %xmm9, %xmm8
876 movaps %xmm8, -0x80(%rdi)
877
878 sub $0x80, %rdx
879 lea -0x80(%rdi), %rdi
880 lea -0x80(%rsi), %rsi
881 jae L(shl_8_bwd)
882L(shl_8_end_bwd):
883 movdqu %xmm0, (%r8)
884 add $0x80, %rdx
885 sub %rdx, %rdi
886 sub %rdx, %rsi
887 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
888
889 ALIGN (4)
890L(shl_9):
891 sub $0x80, %rdx
892 movaps -0x09(%rsi), %xmm1
893 movaps 0x07(%rsi), %xmm2
894 movaps 0x17(%rsi), %xmm3
895 movaps 0x27(%rsi), %xmm4
896 movaps 0x37(%rsi), %xmm5
897 movaps 0x47(%rsi), %xmm6
898 movaps 0x57(%rsi), %xmm7
899 movaps 0x67(%rsi), %xmm8
900 movaps 0x77(%rsi), %xmm9
901 lea 0x80(%rsi), %rsi
902 palignr $9, %xmm8, %xmm9
903 movaps %xmm9, 0x70(%rdi)
904 palignr $9, %xmm7, %xmm8
905 movaps %xmm8, 0x60(%rdi)
906 palignr $9, %xmm6, %xmm7
907 movaps %xmm7, 0x50(%rdi)
908 palignr $9, %xmm5, %xmm6
909 movaps %xmm6, 0x40(%rdi)
910 palignr $9, %xmm4, %xmm5
911 movaps %xmm5, 0x30(%rdi)
912 palignr $9, %xmm3, %xmm4
913 movaps %xmm4, 0x20(%rdi)
914 palignr $9, %xmm2, %xmm3
915 movaps %xmm3, 0x10(%rdi)
916 palignr $9, %xmm1, %xmm2
917 movaps %xmm2, (%rdi)
918 lea 0x80(%rdi), %rdi
919 jae L(shl_9)
920 movdqu %xmm0, (%r8)
921 add $0x80, %rdx
922 add %rdx, %rdi
923 add %rdx, %rsi
924 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
925
926 ALIGN (4)
927L(shl_9_bwd):
928 movaps -0x09(%rsi), %xmm1
929
930 movaps -0x19(%rsi), %xmm2
931 palignr $9, %xmm2, %xmm1
932 movaps %xmm1, -0x10(%rdi)
933
934 movaps -0x29(%rsi), %xmm3
935 palignr $9, %xmm3, %xmm2
936 movaps %xmm2, -0x20(%rdi)
937
938 movaps -0x39(%rsi), %xmm4
939 palignr $9, %xmm4, %xmm3
940 movaps %xmm3, -0x30(%rdi)
941
942 movaps -0x49(%rsi), %xmm5
943 palignr $9, %xmm5, %xmm4
944 movaps %xmm4, -0x40(%rdi)
945
946 movaps -0x59(%rsi), %xmm6
947 palignr $9, %xmm6, %xmm5
948 movaps %xmm5, -0x50(%rdi)
949
950 movaps -0x69(%rsi), %xmm7
951 palignr $9, %xmm7, %xmm6
952 movaps %xmm6, -0x60(%rdi)
953
954 movaps -0x79(%rsi), %xmm8
955 palignr $9, %xmm8, %xmm7
956 movaps %xmm7, -0x70(%rdi)
957
958 movaps -0x89(%rsi), %xmm9
959 palignr $9, %xmm9, %xmm8
960 movaps %xmm8, -0x80(%rdi)
961
962 sub $0x80, %rdx
963 lea -0x80(%rdi), %rdi
964 lea -0x80(%rsi), %rsi
965 jae L(shl_9_bwd)
966 movdqu %xmm0, (%r8)
967 add $0x80, %rdx
968 sub %rdx, %rdi
969 sub %rdx, %rsi
970 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
971
972 ALIGN (4)
973L(shl_10):
974 sub $0x80, %rdx
975 movaps -0x0a(%rsi), %xmm1
976 movaps 0x06(%rsi), %xmm2
977 movaps 0x16(%rsi), %xmm3
978 movaps 0x26(%rsi), %xmm4
979 movaps 0x36(%rsi), %xmm5
980 movaps 0x46(%rsi), %xmm6
981 movaps 0x56(%rsi), %xmm7
982 movaps 0x66(%rsi), %xmm8
983 movaps 0x76(%rsi), %xmm9
984 lea 0x80(%rsi), %rsi
985 palignr $10, %xmm8, %xmm9
986 movaps %xmm9, 0x70(%rdi)
987 palignr $10, %xmm7, %xmm8
988 movaps %xmm8, 0x60(%rdi)
989 palignr $10, %xmm6, %xmm7
990 movaps %xmm7, 0x50(%rdi)
991 palignr $10, %xmm5, %xmm6
992 movaps %xmm6, 0x40(%rdi)
993 palignr $10, %xmm4, %xmm5
994 movaps %xmm5, 0x30(%rdi)
995 palignr $10, %xmm3, %xmm4
996 movaps %xmm4, 0x20(%rdi)
997 palignr $10, %xmm2, %xmm3
998 movaps %xmm3, 0x10(%rdi)
999 palignr $10, %xmm1, %xmm2
1000 movaps %xmm2, (%rdi)
1001 lea 0x80(%rdi), %rdi
1002 jae L(shl_10)
1003 movdqu %xmm0, (%r8)
1004 add $0x80, %rdx
1005 add %rdx, %rdi
1006 add %rdx, %rsi
1007 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1008
1009 ALIGN (4)
1010L(shl_10_bwd):
1011 movaps -0x0a(%rsi), %xmm1
1012
1013 movaps -0x1a(%rsi), %xmm2
1014 palignr $10, %xmm2, %xmm1
1015 movaps %xmm1, -0x10(%rdi)
1016
1017 movaps -0x2a(%rsi), %xmm3
1018 palignr $10, %xmm3, %xmm2
1019 movaps %xmm2, -0x20(%rdi)
1020
1021 movaps -0x3a(%rsi), %xmm4
1022 palignr $10, %xmm4, %xmm3
1023 movaps %xmm3, -0x30(%rdi)
1024
1025 movaps -0x4a(%rsi), %xmm5
1026 palignr $10, %xmm5, %xmm4
1027 movaps %xmm4, -0x40(%rdi)
1028
1029 movaps -0x5a(%rsi), %xmm6
1030 palignr $10, %xmm6, %xmm5
1031 movaps %xmm5, -0x50(%rdi)
1032
1033 movaps -0x6a(%rsi), %xmm7
1034 palignr $10, %xmm7, %xmm6
1035 movaps %xmm6, -0x60(%rdi)
1036
1037 movaps -0x7a(%rsi), %xmm8
1038 palignr $10, %xmm8, %xmm7
1039 movaps %xmm7, -0x70(%rdi)
1040
1041 movaps -0x8a(%rsi), %xmm9
1042 palignr $10, %xmm9, %xmm8
1043 movaps %xmm8, -0x80(%rdi)
1044
1045 sub $0x80, %rdx
1046 lea -0x80(%rdi), %rdi
1047 lea -0x80(%rsi), %rsi
1048 jae L(shl_10_bwd)
1049 movdqu %xmm0, (%r8)
1050 add $0x80, %rdx
1051 sub %rdx, %rdi
1052 sub %rdx, %rsi
1053 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1054
1055 ALIGN (4)
1056L(shl_11):
1057 sub $0x80, %rdx
1058 movaps -0x0b(%rsi), %xmm1
1059 movaps 0x05(%rsi), %xmm2
1060 movaps 0x15(%rsi), %xmm3
1061 movaps 0x25(%rsi), %xmm4
1062 movaps 0x35(%rsi), %xmm5
1063 movaps 0x45(%rsi), %xmm6
1064 movaps 0x55(%rsi), %xmm7
1065 movaps 0x65(%rsi), %xmm8
1066 movaps 0x75(%rsi), %xmm9
1067 lea 0x80(%rsi), %rsi
1068 palignr $11, %xmm8, %xmm9
1069 movaps %xmm9, 0x70(%rdi)
1070 palignr $11, %xmm7, %xmm8
1071 movaps %xmm8, 0x60(%rdi)
1072 palignr $11, %xmm6, %xmm7
1073 movaps %xmm7, 0x50(%rdi)
1074 palignr $11, %xmm5, %xmm6
1075 movaps %xmm6, 0x40(%rdi)
1076 palignr $11, %xmm4, %xmm5
1077 movaps %xmm5, 0x30(%rdi)
1078 palignr $11, %xmm3, %xmm4
1079 movaps %xmm4, 0x20(%rdi)
1080 palignr $11, %xmm2, %xmm3
1081 movaps %xmm3, 0x10(%rdi)
1082 palignr $11, %xmm1, %xmm2
1083 movaps %xmm2, (%rdi)
1084 lea 0x80(%rdi), %rdi
1085 jae L(shl_11)
1086 movdqu %xmm0, (%r8)
1087 add $0x80, %rdx
1088 add %rdx, %rdi
1089 add %rdx, %rsi
1090 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1091
1092 ALIGN (4)
1093L(shl_11_bwd):
1094 movaps -0x0b(%rsi), %xmm1
1095
1096 movaps -0x1b(%rsi), %xmm2
1097 palignr $11, %xmm2, %xmm1
1098 movaps %xmm1, -0x10(%rdi)
1099
1100 movaps -0x2b(%rsi), %xmm3
1101 palignr $11, %xmm3, %xmm2
1102 movaps %xmm2, -0x20(%rdi)
1103
1104 movaps -0x3b(%rsi), %xmm4
1105 palignr $11, %xmm4, %xmm3
1106 movaps %xmm3, -0x30(%rdi)
1107
1108 movaps -0x4b(%rsi), %xmm5
1109 palignr $11, %xmm5, %xmm4
1110 movaps %xmm4, -0x40(%rdi)
1111
1112 movaps -0x5b(%rsi), %xmm6
1113 palignr $11, %xmm6, %xmm5
1114 movaps %xmm5, -0x50(%rdi)
1115
1116 movaps -0x6b(%rsi), %xmm7
1117 palignr $11, %xmm7, %xmm6
1118 movaps %xmm6, -0x60(%rdi)
1119
1120 movaps -0x7b(%rsi), %xmm8
1121 palignr $11, %xmm8, %xmm7
1122 movaps %xmm7, -0x70(%rdi)
1123
1124 movaps -0x8b(%rsi), %xmm9
1125 palignr $11, %xmm9, %xmm8
1126 movaps %xmm8, -0x80(%rdi)
1127
1128 sub $0x80, %rdx
1129 lea -0x80(%rdi), %rdi
1130 lea -0x80(%rsi), %rsi
1131 jae L(shl_11_bwd)
1132 movdqu %xmm0, (%r8)
1133 add $0x80, %rdx
1134 sub %rdx, %rdi
1135 sub %rdx, %rsi
1136 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1137
1138 ALIGN (4)
1139L(shl_12):
1140 sub $0x80, %rdx
1141 movdqa -0x0c(%rsi), %xmm1
1142 movaps 0x04(%rsi), %xmm2
1143 movaps 0x14(%rsi), %xmm3
1144 movaps 0x24(%rsi), %xmm4
1145 movaps 0x34(%rsi), %xmm5
1146 movaps 0x44(%rsi), %xmm6
1147 movaps 0x54(%rsi), %xmm7
1148 movaps 0x64(%rsi), %xmm8
1149 movaps 0x74(%rsi), %xmm9
1150 lea 0x80(%rsi), %rsi
1151 palignr $12, %xmm8, %xmm9
1152 movaps %xmm9, 0x70(%rdi)
1153 palignr $12, %xmm7, %xmm8
1154 movaps %xmm8, 0x60(%rdi)
1155 palignr $12, %xmm6, %xmm7
1156 movaps %xmm7, 0x50(%rdi)
1157 palignr $12, %xmm5, %xmm6
1158 movaps %xmm6, 0x40(%rdi)
1159 palignr $12, %xmm4, %xmm5
1160 movaps %xmm5, 0x30(%rdi)
1161 palignr $12, %xmm3, %xmm4
1162 movaps %xmm4, 0x20(%rdi)
1163 palignr $12, %xmm2, %xmm3
1164 movaps %xmm3, 0x10(%rdi)
1165 palignr $12, %xmm1, %xmm2
1166 movaps %xmm2, (%rdi)
1167
1168 lea 0x80(%rdi), %rdi
1169 jae L(shl_12)
1170 movdqu %xmm0, (%r8)
1171 add $0x80, %rdx
1172 add %rdx, %rdi
1173 add %rdx, %rsi
1174 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1175
1176 ALIGN (4)
1177L(shl_12_bwd):
1178 movaps -0x0c(%rsi), %xmm1
1179
1180 movaps -0x1c(%rsi), %xmm2
1181 palignr $12, %xmm2, %xmm1
1182 movaps %xmm1, -0x10(%rdi)
1183
1184 movaps -0x2c(%rsi), %xmm3
1185 palignr $12, %xmm3, %xmm2
1186 movaps %xmm2, -0x20(%rdi)
1187
1188 movaps -0x3c(%rsi), %xmm4
1189 palignr $12, %xmm4, %xmm3
1190 movaps %xmm3, -0x30(%rdi)
1191
1192 movaps -0x4c(%rsi), %xmm5
1193 palignr $12, %xmm5, %xmm4
1194 movaps %xmm4, -0x40(%rdi)
1195
1196 movaps -0x5c(%rsi), %xmm6
1197 palignr $12, %xmm6, %xmm5
1198 movaps %xmm5, -0x50(%rdi)
1199
1200 movaps -0x6c(%rsi), %xmm7
1201 palignr $12, %xmm7, %xmm6
1202 movaps %xmm6, -0x60(%rdi)
1203
1204 movaps -0x7c(%rsi), %xmm8
1205 palignr $12, %xmm8, %xmm7
1206 movaps %xmm7, -0x70(%rdi)
1207
1208 movaps -0x8c(%rsi), %xmm9
1209 palignr $12, %xmm9, %xmm8
1210 movaps %xmm8, -0x80(%rdi)
1211
1212 sub $0x80, %rdx
1213 lea -0x80(%rdi), %rdi
1214 lea -0x80(%rsi), %rsi
1215 jae L(shl_12_bwd)
1216 movdqu %xmm0, (%r8)
1217 add $0x80, %rdx
1218 sub %rdx, %rdi
1219 sub %rdx, %rsi
1220 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1221
1222 ALIGN (4)
1223L(shl_13):
1224 sub $0x80, %rdx
1225 movaps -0x0d(%rsi), %xmm1
1226 movaps 0x03(%rsi), %xmm2
1227 movaps 0x13(%rsi), %xmm3
1228 movaps 0x23(%rsi), %xmm4
1229 movaps 0x33(%rsi), %xmm5
1230 movaps 0x43(%rsi), %xmm6
1231 movaps 0x53(%rsi), %xmm7
1232 movaps 0x63(%rsi), %xmm8
1233 movaps 0x73(%rsi), %xmm9
1234 lea 0x80(%rsi), %rsi
1235 palignr $13, %xmm8, %xmm9
1236 movaps %xmm9, 0x70(%rdi)
1237 palignr $13, %xmm7, %xmm8
1238 movaps %xmm8, 0x60(%rdi)
1239 palignr $13, %xmm6, %xmm7
1240 movaps %xmm7, 0x50(%rdi)
1241 palignr $13, %xmm5, %xmm6
1242 movaps %xmm6, 0x40(%rdi)
1243 palignr $13, %xmm4, %xmm5
1244 movaps %xmm5, 0x30(%rdi)
1245 palignr $13, %xmm3, %xmm4
1246 movaps %xmm4, 0x20(%rdi)
1247 palignr $13, %xmm2, %xmm3
1248 movaps %xmm3, 0x10(%rdi)
1249 palignr $13, %xmm1, %xmm2
1250 movaps %xmm2, (%rdi)
1251 lea 0x80(%rdi), %rdi
1252 jae L(shl_13)
1253 movdqu %xmm0, (%r8)
1254 add $0x80, %rdx
1255 add %rdx, %rdi
1256 add %rdx, %rsi
1257 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1258
1259 ALIGN (4)
1260L(shl_13_bwd):
1261 movaps -0x0d(%rsi), %xmm1
1262
1263 movaps -0x1d(%rsi), %xmm2
1264 palignr $13, %xmm2, %xmm1
1265 movaps %xmm1, -0x10(%rdi)
1266
1267 movaps -0x2d(%rsi), %xmm3
1268 palignr $13, %xmm3, %xmm2
1269 movaps %xmm2, -0x20(%rdi)
1270
1271 movaps -0x3d(%rsi), %xmm4
1272 palignr $13, %xmm4, %xmm3
1273 movaps %xmm3, -0x30(%rdi)
1274
1275 movaps -0x4d(%rsi), %xmm5
1276 palignr $13, %xmm5, %xmm4
1277 movaps %xmm4, -0x40(%rdi)
1278
1279 movaps -0x5d(%rsi), %xmm6
1280 palignr $13, %xmm6, %xmm5
1281 movaps %xmm5, -0x50(%rdi)
1282
1283 movaps -0x6d(%rsi), %xmm7
1284 palignr $13, %xmm7, %xmm6
1285 movaps %xmm6, -0x60(%rdi)
1286
1287 movaps -0x7d(%rsi), %xmm8
1288 palignr $13, %xmm8, %xmm7
1289 movaps %xmm7, -0x70(%rdi)
1290
1291 movaps -0x8d(%rsi), %xmm9
1292 palignr $13, %xmm9, %xmm8
1293 movaps %xmm8, -0x80(%rdi)
1294
1295 sub $0x80, %rdx
1296 lea -0x80(%rdi), %rdi
1297 lea -0x80(%rsi), %rsi
1298 jae L(shl_13_bwd)
1299 movdqu %xmm0, (%r8)
1300 add $0x80, %rdx
1301 sub %rdx, %rdi
1302 sub %rdx, %rsi
1303 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1304
1305 ALIGN (4)
1306L(shl_14):
1307 sub $0x80, %rdx
1308 movaps -0x0e(%rsi), %xmm1
1309 movaps 0x02(%rsi), %xmm2
1310 movaps 0x12(%rsi), %xmm3
1311 movaps 0x22(%rsi), %xmm4
1312 movaps 0x32(%rsi), %xmm5
1313 movaps 0x42(%rsi), %xmm6
1314 movaps 0x52(%rsi), %xmm7
1315 movaps 0x62(%rsi), %xmm8
1316 movaps 0x72(%rsi), %xmm9
1317 lea 0x80(%rsi), %rsi
1318 palignr $14, %xmm8, %xmm9
1319 movaps %xmm9, 0x70(%rdi)
1320 palignr $14, %xmm7, %xmm8
1321 movaps %xmm8, 0x60(%rdi)
1322 palignr $14, %xmm6, %xmm7
1323 movaps %xmm7, 0x50(%rdi)
1324 palignr $14, %xmm5, %xmm6
1325 movaps %xmm6, 0x40(%rdi)
1326 palignr $14, %xmm4, %xmm5
1327 movaps %xmm5, 0x30(%rdi)
1328 palignr $14, %xmm3, %xmm4
1329 movaps %xmm4, 0x20(%rdi)
1330 palignr $14, %xmm2, %xmm3
1331 movaps %xmm3, 0x10(%rdi)
1332 palignr $14, %xmm1, %xmm2
1333 movaps %xmm2, (%rdi)
1334 lea 0x80(%rdi), %rdi
1335 jae L(shl_14)
1336 movdqu %xmm0, (%r8)
1337 add $0x80, %rdx
1338 add %rdx, %rdi
1339 add %rdx, %rsi
1340 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1341
1342 ALIGN (4)
1343L(shl_14_bwd):
1344 movaps -0x0e(%rsi), %xmm1
1345
1346 movaps -0x1e(%rsi), %xmm2
1347 palignr $14, %xmm2, %xmm1
1348 movaps %xmm1, -0x10(%rdi)
1349
1350 movaps -0x2e(%rsi), %xmm3
1351 palignr $14, %xmm3, %xmm2
1352 movaps %xmm2, -0x20(%rdi)
1353
1354 movaps -0x3e(%rsi), %xmm4
1355 palignr $14, %xmm4, %xmm3
1356 movaps %xmm3, -0x30(%rdi)
1357
1358 movaps -0x4e(%rsi), %xmm5
1359 palignr $14, %xmm5, %xmm4
1360 movaps %xmm4, -0x40(%rdi)
1361
1362 movaps -0x5e(%rsi), %xmm6
1363 palignr $14, %xmm6, %xmm5
1364 movaps %xmm5, -0x50(%rdi)
1365
1366 movaps -0x6e(%rsi), %xmm7
1367 palignr $14, %xmm7, %xmm6
1368 movaps %xmm6, -0x60(%rdi)
1369
1370 movaps -0x7e(%rsi), %xmm8
1371 palignr $14, %xmm8, %xmm7
1372 movaps %xmm7, -0x70(%rdi)
1373
1374 movaps -0x8e(%rsi), %xmm9
1375 palignr $14, %xmm9, %xmm8
1376 movaps %xmm8, -0x80(%rdi)
1377
1378 sub $0x80, %rdx
1379 lea -0x80(%rdi), %rdi
1380 lea -0x80(%rsi), %rsi
1381 jae L(shl_14_bwd)
1382 movdqu %xmm0, (%r8)
1383 add $0x80, %rdx
1384 sub %rdx, %rdi
1385 sub %rdx, %rsi
1386 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1387
1388 ALIGN (4)
1389L(shl_15):
1390 sub $0x80, %rdx
1391 movaps -0x0f(%rsi), %xmm1
1392 movaps 0x01(%rsi), %xmm2
1393 movaps 0x11(%rsi), %xmm3
1394 movaps 0x21(%rsi), %xmm4
1395 movaps 0x31(%rsi), %xmm5
1396 movaps 0x41(%rsi), %xmm6
1397 movaps 0x51(%rsi), %xmm7
1398 movaps 0x61(%rsi), %xmm8
1399 movaps 0x71(%rsi), %xmm9
1400 lea 0x80(%rsi), %rsi
1401 palignr $15, %xmm8, %xmm9
1402 movaps %xmm9, 0x70(%rdi)
1403 palignr $15, %xmm7, %xmm8
1404 movaps %xmm8, 0x60(%rdi)
1405 palignr $15, %xmm6, %xmm7
1406 movaps %xmm7, 0x50(%rdi)
1407 palignr $15, %xmm5, %xmm6
1408 movaps %xmm6, 0x40(%rdi)
1409 palignr $15, %xmm4, %xmm5
1410 movaps %xmm5, 0x30(%rdi)
1411 palignr $15, %xmm3, %xmm4
1412 movaps %xmm4, 0x20(%rdi)
1413 palignr $15, %xmm2, %xmm3
1414 movaps %xmm3, 0x10(%rdi)
1415 palignr $15, %xmm1, %xmm2
1416 movaps %xmm2, (%rdi)
1417 lea 0x80(%rdi), %rdi
1418 jae L(shl_15)
1419 movdqu %xmm0, (%r8)
1420 add $0x80, %rdx
1421 add %rdx, %rdi
1422 add %rdx, %rsi
1423 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1424
1425 ALIGN (4)
1426L(shl_15_bwd):
1427 movaps -0x0f(%rsi), %xmm1
1428
1429 movaps -0x1f(%rsi), %xmm2
1430 palignr $15, %xmm2, %xmm1
1431 movaps %xmm1, -0x10(%rdi)
1432
1433 movaps -0x2f(%rsi), %xmm3
1434 palignr $15, %xmm3, %xmm2
1435 movaps %xmm2, -0x20(%rdi)
1436
1437 movaps -0x3f(%rsi), %xmm4
1438 palignr $15, %xmm4, %xmm3
1439 movaps %xmm3, -0x30(%rdi)
1440
1441 movaps -0x4f(%rsi), %xmm5
1442 palignr $15, %xmm5, %xmm4
1443 movaps %xmm4, -0x40(%rdi)
1444
1445 movaps -0x5f(%rsi), %xmm6
1446 palignr $15, %xmm6, %xmm5
1447 movaps %xmm5, -0x50(%rdi)
1448
1449 movaps -0x6f(%rsi), %xmm7
1450 palignr $15, %xmm7, %xmm6
1451 movaps %xmm6, -0x60(%rdi)
1452
1453 movaps -0x7f(%rsi), %xmm8
1454 palignr $15, %xmm8, %xmm7
1455 movaps %xmm7, -0x70(%rdi)
1456
1457 movaps -0x8f(%rsi), %xmm9
1458 palignr $15, %xmm9, %xmm8
1459 movaps %xmm8, -0x80(%rdi)
1460
1461 sub $0x80, %rdx
1462 lea -0x80(%rdi), %rdi
1463 lea -0x80(%rsi), %rsi
1464 jae L(shl_15_bwd)
1465 movdqu %xmm0, (%r8)
1466 add $0x80, %rdx
1467 sub %rdx, %rdi
1468 sub %rdx, %rsi
1469 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1470
1471 ALIGN (4)
1472L(gobble_mem_fwd):
1473 movdqu (%rsi), %xmm1
1474 movdqu %xmm0, (%r8)
1475 movdqa %xmm1, (%rdi)
1476 sub $16, %rdx
1477 add $16, %rsi
1478 add $16, %rdi
1479
1480#ifdef SHARED_CACHE_SIZE_HALF
6d2850e7 1481 mov $SHARED_CACHE_SIZE_HALF, %RCX_LP
6fb8cbcb 1482#else
afec409a 1483 mov __x86_shared_cache_size_half(%rip), %RCX_LP
6fb8cbcb
L
1484#endif
1485#ifdef USE_AS_MEMMOVE
1486 mov %rsi, %r9
1487 sub %rdi, %r9
1488 cmp %rdx, %r9
1489 jae L(memmove_is_memcpy_fwd)
1490 cmp %rcx, %r9
1491 jbe L(ll_cache_copy_fwd_start)
1492L(memmove_is_memcpy_fwd):
1493#endif
1494 cmp %rcx, %rdx
1495 ja L(bigger_in_fwd)
1496 mov %rdx, %rcx
1497L(bigger_in_fwd):
1498 sub %rcx, %rdx
1499 cmp $0x1000, %rdx
1500 jbe L(ll_cache_copy_fwd)
1501
1502 mov %rcx, %r9
1503 shl $3, %r9
1504 cmp %r9, %rdx
1505 jbe L(2steps_copy_fwd)
1506 add %rcx, %rdx
1507 xor %rcx, %rcx
1508L(2steps_copy_fwd):
1509 sub $0x80, %rdx
1510L(gobble_mem_fwd_loop):
1511 sub $0x80, %rdx
1512 prefetcht0 0x200(%rsi)
1513 prefetcht0 0x300(%rsi)
1514 movdqu (%rsi), %xmm0
1515 movdqu 0x10(%rsi), %xmm1
1516 movdqu 0x20(%rsi), %xmm2
1517 movdqu 0x30(%rsi), %xmm3
1518 movdqu 0x40(%rsi), %xmm4
1519 movdqu 0x50(%rsi), %xmm5
1520 movdqu 0x60(%rsi), %xmm6
1521 movdqu 0x70(%rsi), %xmm7
1522 lfence
1523 movntdq %xmm0, (%rdi)
1524 movntdq %xmm1, 0x10(%rdi)
1525 movntdq %xmm2, 0x20(%rdi)
1526 movntdq %xmm3, 0x30(%rdi)
1527 movntdq %xmm4, 0x40(%rdi)
1528 movntdq %xmm5, 0x50(%rdi)
1529 movntdq %xmm6, 0x60(%rdi)
1530 movntdq %xmm7, 0x70(%rdi)
1531 lea 0x80(%rsi), %rsi
1532 lea 0x80(%rdi), %rdi
1533 jae L(gobble_mem_fwd_loop)
1534 sfence
1535 cmp $0x80, %rcx
1536 jb L(gobble_mem_fwd_end)
1537 add $0x80, %rdx
1538L(ll_cache_copy_fwd):
1539 add %rcx, %rdx
1540L(ll_cache_copy_fwd_start):
1541 sub $0x80, %rdx
1542L(gobble_ll_loop_fwd):
1543 prefetchnta 0x1c0(%rsi)
1544 prefetchnta 0x280(%rsi)
1545 prefetchnta 0x1c0(%rdi)
1546 prefetchnta 0x280(%rdi)
1547 sub $0x80, %rdx
1548 movdqu (%rsi), %xmm0
1549 movdqu 0x10(%rsi), %xmm1
1550 movdqu 0x20(%rsi), %xmm2
1551 movdqu 0x30(%rsi), %xmm3
1552 movdqu 0x40(%rsi), %xmm4
1553 movdqu 0x50(%rsi), %xmm5
1554 movdqu 0x60(%rsi), %xmm6
1555 movdqu 0x70(%rsi), %xmm7
1556 movdqa %xmm0, (%rdi)
1557 movdqa %xmm1, 0x10(%rdi)
1558 movdqa %xmm2, 0x20(%rdi)
1559 movdqa %xmm3, 0x30(%rdi)
1560 movdqa %xmm4, 0x40(%rdi)
1561 movdqa %xmm5, 0x50(%rdi)
1562 movdqa %xmm6, 0x60(%rdi)
1563 movdqa %xmm7, 0x70(%rdi)
1564 lea 0x80(%rsi), %rsi
1565 lea 0x80(%rdi), %rdi
1566 jae L(gobble_ll_loop_fwd)
1567L(gobble_mem_fwd_end):
1568 add $0x80, %rdx
1569 add %rdx, %rsi
1570 add %rdx, %rdi
1571 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1572
1573 ALIGN (4)
1574L(gobble_mem_bwd):
1575 add %rdx, %rsi
1576 add %rdx, %rdi
1577
1578 movdqu -16(%rsi), %xmm0
1579 lea -16(%rdi), %r8
1580 mov %rdi, %r9
1581 and $-16, %rdi
1582 sub %rdi, %r9
1583 sub %r9, %rsi
1584 sub %r9, %rdx
1585
1586
1587#ifdef SHARED_CACHE_SIZE_HALF
6d2850e7 1588 mov $SHARED_CACHE_SIZE_HALF, %RCX_LP
6fb8cbcb 1589#else
afec409a 1590 mov __x86_shared_cache_size_half(%rip), %RCX_LP
6fb8cbcb
L
1591#endif
1592#ifdef USE_AS_MEMMOVE
1593 mov %rdi, %r9
1594 sub %rsi, %r9
1595 cmp %rdx, %r9
1596 jae L(memmove_is_memcpy_bwd)
1597 cmp %rcx, %r9
1598 jbe L(ll_cache_copy_bwd_start)
1599L(memmove_is_memcpy_bwd):
1600#endif
1601 cmp %rcx, %rdx
1602 ja L(bigger)
1603 mov %rdx, %rcx
1604L(bigger):
1605 sub %rcx, %rdx
1606 cmp $0x1000, %rdx
1607 jbe L(ll_cache_copy)
1608
1609 mov %rcx, %r9
1610 shl $3, %r9
1611 cmp %r9, %rdx
1612 jbe L(2steps_copy)
1613 add %rcx, %rdx
1614 xor %rcx, %rcx
1615L(2steps_copy):
1616 sub $0x80, %rdx
1617L(gobble_mem_bwd_loop):
1618 sub $0x80, %rdx
1619 prefetcht0 -0x200(%rsi)
1620 prefetcht0 -0x300(%rsi)
1621 movdqu -0x10(%rsi), %xmm1
1622 movdqu -0x20(%rsi), %xmm2
1623 movdqu -0x30(%rsi), %xmm3
1624 movdqu -0x40(%rsi), %xmm4
1625 movdqu -0x50(%rsi), %xmm5
1626 movdqu -0x60(%rsi), %xmm6
1627 movdqu -0x70(%rsi), %xmm7
1628 movdqu -0x80(%rsi), %xmm8
1629 lfence
1630 movntdq %xmm1, -0x10(%rdi)
1631 movntdq %xmm2, -0x20(%rdi)
1632 movntdq %xmm3, -0x30(%rdi)
1633 movntdq %xmm4, -0x40(%rdi)
1634 movntdq %xmm5, -0x50(%rdi)
1635 movntdq %xmm6, -0x60(%rdi)
1636 movntdq %xmm7, -0x70(%rdi)
1637 movntdq %xmm8, -0x80(%rdi)
1638 lea -0x80(%rsi), %rsi
1639 lea -0x80(%rdi), %rdi
1640 jae L(gobble_mem_bwd_loop)
1641 sfence
1642 cmp $0x80, %rcx
1643 jb L(gobble_mem_bwd_end)
1644 add $0x80, %rdx
1645L(ll_cache_copy):
1646 add %rcx, %rdx
1647L(ll_cache_copy_bwd_start):
1648 sub $0x80, %rdx
1649L(gobble_ll_loop):
1650 prefetchnta -0x1c0(%rsi)
1651 prefetchnta -0x280(%rsi)
1652 prefetchnta -0x1c0(%rdi)
1653 prefetchnta -0x280(%rdi)
1654 sub $0x80, %rdx
1655 movdqu -0x10(%rsi), %xmm1
1656 movdqu -0x20(%rsi), %xmm2
1657 movdqu -0x30(%rsi), %xmm3
1658 movdqu -0x40(%rsi), %xmm4
1659 movdqu -0x50(%rsi), %xmm5
1660 movdqu -0x60(%rsi), %xmm6
1661 movdqu -0x70(%rsi), %xmm7
1662 movdqu -0x80(%rsi), %xmm8
1663 movdqa %xmm1, -0x10(%rdi)
1664 movdqa %xmm2, -0x20(%rdi)
1665 movdqa %xmm3, -0x30(%rdi)
1666 movdqa %xmm4, -0x40(%rdi)
1667 movdqa %xmm5, -0x50(%rdi)
1668 movdqa %xmm6, -0x60(%rdi)
1669 movdqa %xmm7, -0x70(%rdi)
1670 movdqa %xmm8, -0x80(%rdi)
1671 lea -0x80(%rsi), %rsi
1672 lea -0x80(%rdi), %rdi
1673 jae L(gobble_ll_loop)
1674L(gobble_mem_bwd_end):
1675 movdqu %xmm0, (%r8)
1676 add $0x80, %rdx
1677 sub %rdx, %rsi
1678 sub %rdx, %rdi
1679 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1680
1681 .p2align 4
1682L(fwd_write_128bytes):
1683 lddqu -128(%rsi), %xmm0
1684 movdqu %xmm0, -128(%rdi)
1685L(fwd_write_112bytes):
1686 lddqu -112(%rsi), %xmm0
1687 movdqu %xmm0, -112(%rdi)
1688L(fwd_write_96bytes):
1689 lddqu -96(%rsi), %xmm0
1690 movdqu %xmm0, -96(%rdi)
1691L(fwd_write_80bytes):
1692 lddqu -80(%rsi), %xmm0
1693 movdqu %xmm0, -80(%rdi)
1694L(fwd_write_64bytes):
1695 lddqu -64(%rsi), %xmm0
1696 movdqu %xmm0, -64(%rdi)
1697L(fwd_write_48bytes):
1698 lddqu -48(%rsi), %xmm0
1699 movdqu %xmm0, -48(%rdi)
1700L(fwd_write_32bytes):
1701 lddqu -32(%rsi), %xmm0
1702 movdqu %xmm0, -32(%rdi)
1703L(fwd_write_16bytes):
1704 lddqu -16(%rsi), %xmm0
1705 movdqu %xmm0, -16(%rdi)
1706L(fwd_write_0bytes):
1707 ret
1708
1709
1710 .p2align 4
1711L(fwd_write_143bytes):
1712 lddqu -143(%rsi), %xmm0
1713 movdqu %xmm0, -143(%rdi)
1714L(fwd_write_127bytes):
1715 lddqu -127(%rsi), %xmm0
1716 movdqu %xmm0, -127(%rdi)
1717L(fwd_write_111bytes):
1718 lddqu -111(%rsi), %xmm0
1719 movdqu %xmm0, -111(%rdi)
1720L(fwd_write_95bytes):
1721 lddqu -95(%rsi), %xmm0
1722 movdqu %xmm0, -95(%rdi)
1723L(fwd_write_79bytes):
1724 lddqu -79(%rsi), %xmm0
1725 movdqu %xmm0, -79(%rdi)
1726L(fwd_write_63bytes):
1727 lddqu -63(%rsi), %xmm0
1728 movdqu %xmm0, -63(%rdi)
1729L(fwd_write_47bytes):
1730 lddqu -47(%rsi), %xmm0
1731 movdqu %xmm0, -47(%rdi)
1732L(fwd_write_31bytes):
1733 lddqu -31(%rsi), %xmm0
1734 lddqu -16(%rsi), %xmm1
1735 movdqu %xmm0, -31(%rdi)
1736 movdqu %xmm1, -16(%rdi)
1737 ret
1738
1739 .p2align 4
1740L(fwd_write_15bytes):
1741 mov -15(%rsi), %rdx
1742 mov -8(%rsi), %rcx
1743 mov %rdx, -15(%rdi)
1744 mov %rcx, -8(%rdi)
1745 ret
1746
1747 .p2align 4
1748L(fwd_write_142bytes):
1749 lddqu -142(%rsi), %xmm0
1750 movdqu %xmm0, -142(%rdi)
1751L(fwd_write_126bytes):
1752 lddqu -126(%rsi), %xmm0
1753 movdqu %xmm0, -126(%rdi)
1754L(fwd_write_110bytes):
1755 lddqu -110(%rsi), %xmm0
1756 movdqu %xmm0, -110(%rdi)
1757L(fwd_write_94bytes):
1758 lddqu -94(%rsi), %xmm0
1759 movdqu %xmm0, -94(%rdi)
1760L(fwd_write_78bytes):
1761 lddqu -78(%rsi), %xmm0
1762 movdqu %xmm0, -78(%rdi)
1763L(fwd_write_62bytes):
1764 lddqu -62(%rsi), %xmm0
1765 movdqu %xmm0, -62(%rdi)
1766L(fwd_write_46bytes):
1767 lddqu -46(%rsi), %xmm0
1768 movdqu %xmm0, -46(%rdi)
1769L(fwd_write_30bytes):
1770 lddqu -30(%rsi), %xmm0
1771 lddqu -16(%rsi), %xmm1
1772 movdqu %xmm0, -30(%rdi)
1773 movdqu %xmm1, -16(%rdi)
1774 ret
1775
1776 .p2align 4
1777L(fwd_write_14bytes):
1778 mov -14(%rsi), %rdx
1779 mov -8(%rsi), %rcx
1780 mov %rdx, -14(%rdi)
1781 mov %rcx, -8(%rdi)
1782 ret
1783
1784 .p2align 4
1785L(fwd_write_141bytes):
1786 lddqu -141(%rsi), %xmm0
1787 movdqu %xmm0, -141(%rdi)
1788L(fwd_write_125bytes):
1789 lddqu -125(%rsi), %xmm0
1790 movdqu %xmm0, -125(%rdi)
1791L(fwd_write_109bytes):
1792 lddqu -109(%rsi), %xmm0
1793 movdqu %xmm0, -109(%rdi)
1794L(fwd_write_93bytes):
1795 lddqu -93(%rsi), %xmm0
1796 movdqu %xmm0, -93(%rdi)
1797L(fwd_write_77bytes):
1798 lddqu -77(%rsi), %xmm0
1799 movdqu %xmm0, -77(%rdi)
1800L(fwd_write_61bytes):
1801 lddqu -61(%rsi), %xmm0
1802 movdqu %xmm0, -61(%rdi)
1803L(fwd_write_45bytes):
1804 lddqu -45(%rsi), %xmm0
1805 movdqu %xmm0, -45(%rdi)
1806L(fwd_write_29bytes):
1807 lddqu -29(%rsi), %xmm0
1808 lddqu -16(%rsi), %xmm1
1809 movdqu %xmm0, -29(%rdi)
1810 movdqu %xmm1, -16(%rdi)
1811 ret
1812
1813 .p2align 4
1814L(fwd_write_13bytes):
1815 mov -13(%rsi), %rdx
1816 mov -8(%rsi), %rcx
1817 mov %rdx, -13(%rdi)
1818 mov %rcx, -8(%rdi)
1819 ret
1820
1821 .p2align 4
1822L(fwd_write_140bytes):
1823 lddqu -140(%rsi), %xmm0
1824 movdqu %xmm0, -140(%rdi)
1825L(fwd_write_124bytes):
1826 lddqu -124(%rsi), %xmm0
1827 movdqu %xmm0, -124(%rdi)
1828L(fwd_write_108bytes):
1829 lddqu -108(%rsi), %xmm0
1830 movdqu %xmm0, -108(%rdi)
1831L(fwd_write_92bytes):
1832 lddqu -92(%rsi), %xmm0
1833 movdqu %xmm0, -92(%rdi)
1834L(fwd_write_76bytes):
1835 lddqu -76(%rsi), %xmm0
1836 movdqu %xmm0, -76(%rdi)
1837L(fwd_write_60bytes):
1838 lddqu -60(%rsi), %xmm0
1839 movdqu %xmm0, -60(%rdi)
1840L(fwd_write_44bytes):
1841 lddqu -44(%rsi), %xmm0
1842 movdqu %xmm0, -44(%rdi)
1843L(fwd_write_28bytes):
1844 lddqu -28(%rsi), %xmm0
1845 lddqu -16(%rsi), %xmm1
1846 movdqu %xmm0, -28(%rdi)
1847 movdqu %xmm1, -16(%rdi)
1848 ret
1849
1850 .p2align 4
1851L(fwd_write_12bytes):
1852 mov -12(%rsi), %rdx
1853 mov -4(%rsi), %ecx
1854 mov %rdx, -12(%rdi)
1855 mov %ecx, -4(%rdi)
1856 ret
1857
1858 .p2align 4
1859L(fwd_write_139bytes):
1860 lddqu -139(%rsi), %xmm0
1861 movdqu %xmm0, -139(%rdi)
1862L(fwd_write_123bytes):
1863 lddqu -123(%rsi), %xmm0
1864 movdqu %xmm0, -123(%rdi)
1865L(fwd_write_107bytes):
1866 lddqu -107(%rsi), %xmm0
1867 movdqu %xmm0, -107(%rdi)
1868L(fwd_write_91bytes):
1869 lddqu -91(%rsi), %xmm0
1870 movdqu %xmm0, -91(%rdi)
1871L(fwd_write_75bytes):
1872 lddqu -75(%rsi), %xmm0
1873 movdqu %xmm0, -75(%rdi)
1874L(fwd_write_59bytes):
1875 lddqu -59(%rsi), %xmm0
1876 movdqu %xmm0, -59(%rdi)
1877L(fwd_write_43bytes):
1878 lddqu -43(%rsi), %xmm0
1879 movdqu %xmm0, -43(%rdi)
1880L(fwd_write_27bytes):
1881 lddqu -27(%rsi), %xmm0
1882 lddqu -16(%rsi), %xmm1
1883 movdqu %xmm0, -27(%rdi)
1884 movdqu %xmm1, -16(%rdi)
1885 ret
1886
1887 .p2align 4
1888L(fwd_write_11bytes):
1889 mov -11(%rsi), %rdx
1890 mov -4(%rsi), %ecx
1891 mov %rdx, -11(%rdi)
1892 mov %ecx, -4(%rdi)
1893 ret
1894
1895 .p2align 4
1896L(fwd_write_138bytes):
1897 lddqu -138(%rsi), %xmm0
1898 movdqu %xmm0, -138(%rdi)
1899L(fwd_write_122bytes):
1900 lddqu -122(%rsi), %xmm0
1901 movdqu %xmm0, -122(%rdi)
1902L(fwd_write_106bytes):
1903 lddqu -106(%rsi), %xmm0
1904 movdqu %xmm0, -106(%rdi)
1905L(fwd_write_90bytes):
1906 lddqu -90(%rsi), %xmm0
1907 movdqu %xmm0, -90(%rdi)
1908L(fwd_write_74bytes):
1909 lddqu -74(%rsi), %xmm0
1910 movdqu %xmm0, -74(%rdi)
1911L(fwd_write_58bytes):
1912 lddqu -58(%rsi), %xmm0
1913 movdqu %xmm0, -58(%rdi)
1914L(fwd_write_42bytes):
1915 lddqu -42(%rsi), %xmm0
1916 movdqu %xmm0, -42(%rdi)
1917L(fwd_write_26bytes):
1918 lddqu -26(%rsi), %xmm0
1919 lddqu -16(%rsi), %xmm1
1920 movdqu %xmm0, -26(%rdi)
1921 movdqu %xmm1, -16(%rdi)
1922 ret
1923
1924 .p2align 4
1925L(fwd_write_10bytes):
1926 mov -10(%rsi), %rdx
1927 mov -4(%rsi), %ecx
1928 mov %rdx, -10(%rdi)
1929 mov %ecx, -4(%rdi)
1930 ret
1931
1932 .p2align 4
1933L(fwd_write_137bytes):
1934 lddqu -137(%rsi), %xmm0
1935 movdqu %xmm0, -137(%rdi)
1936L(fwd_write_121bytes):
1937 lddqu -121(%rsi), %xmm0
1938 movdqu %xmm0, -121(%rdi)
1939L(fwd_write_105bytes):
1940 lddqu -105(%rsi), %xmm0
1941 movdqu %xmm0, -105(%rdi)
1942L(fwd_write_89bytes):
1943 lddqu -89(%rsi), %xmm0
1944 movdqu %xmm0, -89(%rdi)
1945L(fwd_write_73bytes):
1946 lddqu -73(%rsi), %xmm0
1947 movdqu %xmm0, -73(%rdi)
1948L(fwd_write_57bytes):
1949 lddqu -57(%rsi), %xmm0
1950 movdqu %xmm0, -57(%rdi)
1951L(fwd_write_41bytes):
1952 lddqu -41(%rsi), %xmm0
1953 movdqu %xmm0, -41(%rdi)
1954L(fwd_write_25bytes):
1955 lddqu -25(%rsi), %xmm0
1956 lddqu -16(%rsi), %xmm1
1957 movdqu %xmm0, -25(%rdi)
1958 movdqu %xmm1, -16(%rdi)
1959 ret
1960
1961 .p2align 4
1962L(fwd_write_9bytes):
1963 mov -9(%rsi), %rdx
1964 mov -4(%rsi), %ecx
1965 mov %rdx, -9(%rdi)
1966 mov %ecx, -4(%rdi)
1967 ret
1968
1969 .p2align 4
1970L(fwd_write_136bytes):
1971 lddqu -136(%rsi), %xmm0
1972 movdqu %xmm0, -136(%rdi)
1973L(fwd_write_120bytes):
1974 lddqu -120(%rsi), %xmm0
1975 movdqu %xmm0, -120(%rdi)
1976L(fwd_write_104bytes):
1977 lddqu -104(%rsi), %xmm0
1978 movdqu %xmm0, -104(%rdi)
1979L(fwd_write_88bytes):
1980 lddqu -88(%rsi), %xmm0
1981 movdqu %xmm0, -88(%rdi)
1982L(fwd_write_72bytes):
1983 lddqu -72(%rsi), %xmm0
1984 movdqu %xmm0, -72(%rdi)
1985L(fwd_write_56bytes):
1986 lddqu -56(%rsi), %xmm0
1987 movdqu %xmm0, -56(%rdi)
1988L(fwd_write_40bytes):
1989 lddqu -40(%rsi), %xmm0
1990 movdqu %xmm0, -40(%rdi)
1991L(fwd_write_24bytes):
1992 lddqu -24(%rsi), %xmm0
1993 lddqu -16(%rsi), %xmm1
1994 movdqu %xmm0, -24(%rdi)
1995 movdqu %xmm1, -16(%rdi)
1996 ret
1997
1998 .p2align 4
1999L(fwd_write_8bytes):
2000 mov -8(%rsi), %rdx
2001 mov %rdx, -8(%rdi)
2002 ret
2003
2004 .p2align 4
2005L(fwd_write_135bytes):
2006 lddqu -135(%rsi), %xmm0
2007 movdqu %xmm0, -135(%rdi)
2008L(fwd_write_119bytes):
2009 lddqu -119(%rsi), %xmm0
2010 movdqu %xmm0, -119(%rdi)
2011L(fwd_write_103bytes):
2012 lddqu -103(%rsi), %xmm0
2013 movdqu %xmm0, -103(%rdi)
2014L(fwd_write_87bytes):
2015 lddqu -87(%rsi), %xmm0
2016 movdqu %xmm0, -87(%rdi)
2017L(fwd_write_71bytes):
2018 lddqu -71(%rsi), %xmm0
2019 movdqu %xmm0, -71(%rdi)
2020L(fwd_write_55bytes):
2021 lddqu -55(%rsi), %xmm0
2022 movdqu %xmm0, -55(%rdi)
2023L(fwd_write_39bytes):
2024 lddqu -39(%rsi), %xmm0
2025 movdqu %xmm0, -39(%rdi)
2026L(fwd_write_23bytes):
2027 lddqu -23(%rsi), %xmm0
2028 lddqu -16(%rsi), %xmm1
2029 movdqu %xmm0, -23(%rdi)
2030 movdqu %xmm1, -16(%rdi)
2031 ret
2032
2033 .p2align 4
2034L(fwd_write_7bytes):
2035 mov -7(%rsi), %edx
2036 mov -4(%rsi), %ecx
2037 mov %edx, -7(%rdi)
2038 mov %ecx, -4(%rdi)
2039 ret
2040
2041 .p2align 4
2042L(fwd_write_134bytes):
2043 lddqu -134(%rsi), %xmm0
2044 movdqu %xmm0, -134(%rdi)
2045L(fwd_write_118bytes):
2046 lddqu -118(%rsi), %xmm0
2047 movdqu %xmm0, -118(%rdi)
2048L(fwd_write_102bytes):
2049 lddqu -102(%rsi), %xmm0
2050 movdqu %xmm0, -102(%rdi)
2051L(fwd_write_86bytes):
2052 lddqu -86(%rsi), %xmm0
2053 movdqu %xmm0, -86(%rdi)
2054L(fwd_write_70bytes):
2055 lddqu -70(%rsi), %xmm0
2056 movdqu %xmm0, -70(%rdi)
2057L(fwd_write_54bytes):
2058 lddqu -54(%rsi), %xmm0
2059 movdqu %xmm0, -54(%rdi)
2060L(fwd_write_38bytes):
2061 lddqu -38(%rsi), %xmm0
2062 movdqu %xmm0, -38(%rdi)
2063L(fwd_write_22bytes):
2064 lddqu -22(%rsi), %xmm0
2065 lddqu -16(%rsi), %xmm1
2066 movdqu %xmm0, -22(%rdi)
2067 movdqu %xmm1, -16(%rdi)
2068 ret
2069
2070 .p2align 4
2071L(fwd_write_6bytes):
2072 mov -6(%rsi), %edx
2073 mov -4(%rsi), %ecx
2074 mov %edx, -6(%rdi)
2075 mov %ecx, -4(%rdi)
2076 ret
2077
2078 .p2align 4
2079L(fwd_write_133bytes):
2080 lddqu -133(%rsi), %xmm0
2081 movdqu %xmm0, -133(%rdi)
2082L(fwd_write_117bytes):
2083 lddqu -117(%rsi), %xmm0
2084 movdqu %xmm0, -117(%rdi)
2085L(fwd_write_101bytes):
2086 lddqu -101(%rsi), %xmm0
2087 movdqu %xmm0, -101(%rdi)
2088L(fwd_write_85bytes):
2089 lddqu -85(%rsi), %xmm0
2090 movdqu %xmm0, -85(%rdi)
2091L(fwd_write_69bytes):
2092 lddqu -69(%rsi), %xmm0
2093 movdqu %xmm0, -69(%rdi)
2094L(fwd_write_53bytes):
2095 lddqu -53(%rsi), %xmm0
2096 movdqu %xmm0, -53(%rdi)
2097L(fwd_write_37bytes):
2098 lddqu -37(%rsi), %xmm0
2099 movdqu %xmm0, -37(%rdi)
2100L(fwd_write_21bytes):
2101 lddqu -21(%rsi), %xmm0
2102 lddqu -16(%rsi), %xmm1
2103 movdqu %xmm0, -21(%rdi)
2104 movdqu %xmm1, -16(%rdi)
2105 ret
2106
2107 .p2align 4
2108L(fwd_write_5bytes):
2109 mov -5(%rsi), %edx
2110 mov -4(%rsi), %ecx
2111 mov %edx, -5(%rdi)
2112 mov %ecx, -4(%rdi)
2113 ret
2114
2115 .p2align 4
2116L(fwd_write_132bytes):
2117 lddqu -132(%rsi), %xmm0
2118 movdqu %xmm0, -132(%rdi)
2119L(fwd_write_116bytes):
2120 lddqu -116(%rsi), %xmm0
2121 movdqu %xmm0, -116(%rdi)
2122L(fwd_write_100bytes):
2123 lddqu -100(%rsi), %xmm0
2124 movdqu %xmm0, -100(%rdi)
2125L(fwd_write_84bytes):
2126 lddqu -84(%rsi), %xmm0
2127 movdqu %xmm0, -84(%rdi)
2128L(fwd_write_68bytes):
2129 lddqu -68(%rsi), %xmm0
2130 movdqu %xmm0, -68(%rdi)
2131L(fwd_write_52bytes):
2132 lddqu -52(%rsi), %xmm0
2133 movdqu %xmm0, -52(%rdi)
2134L(fwd_write_36bytes):
2135 lddqu -36(%rsi), %xmm0
2136 movdqu %xmm0, -36(%rdi)
2137L(fwd_write_20bytes):
2138 lddqu -20(%rsi), %xmm0
2139 lddqu -16(%rsi), %xmm1
2140 movdqu %xmm0, -20(%rdi)
2141 movdqu %xmm1, -16(%rdi)
2142 ret
2143
2144 .p2align 4
2145L(fwd_write_4bytes):
2146 mov -4(%rsi), %edx
2147 mov %edx, -4(%rdi)
2148 ret
2149
2150 .p2align 4
2151L(fwd_write_131bytes):
2152 lddqu -131(%rsi), %xmm0
2153 movdqu %xmm0, -131(%rdi)
2154L(fwd_write_115bytes):
2155 lddqu -115(%rsi), %xmm0
2156 movdqu %xmm0, -115(%rdi)
2157L(fwd_write_99bytes):
2158 lddqu -99(%rsi), %xmm0
2159 movdqu %xmm0, -99(%rdi)
2160L(fwd_write_83bytes):
2161 lddqu -83(%rsi), %xmm0
2162 movdqu %xmm0, -83(%rdi)
2163L(fwd_write_67bytes):
2164 lddqu -67(%rsi), %xmm0
2165 movdqu %xmm0, -67(%rdi)
2166L(fwd_write_51bytes):
2167 lddqu -51(%rsi), %xmm0
2168 movdqu %xmm0, -51(%rdi)
2169L(fwd_write_35bytes):
2170 lddqu -35(%rsi), %xmm0
2171 movdqu %xmm0, -35(%rdi)
2172L(fwd_write_19bytes):
2173 lddqu -19(%rsi), %xmm0
2174 lddqu -16(%rsi), %xmm1
2175 movdqu %xmm0, -19(%rdi)
2176 movdqu %xmm1, -16(%rdi)
2177 ret
2178
2179 .p2align 4
2180L(fwd_write_3bytes):
2181 mov -3(%rsi), %dx
2182 mov -2(%rsi), %cx
2183 mov %dx, -3(%rdi)
2184 mov %cx, -2(%rdi)
2185 ret
2186
2187 .p2align 4
2188L(fwd_write_130bytes):
2189 lddqu -130(%rsi), %xmm0
2190 movdqu %xmm0, -130(%rdi)
2191L(fwd_write_114bytes):
2192 lddqu -114(%rsi), %xmm0
2193 movdqu %xmm0, -114(%rdi)
2194L(fwd_write_98bytes):
2195 lddqu -98(%rsi), %xmm0
2196 movdqu %xmm0, -98(%rdi)
2197L(fwd_write_82bytes):
2198 lddqu -82(%rsi), %xmm0
2199 movdqu %xmm0, -82(%rdi)
2200L(fwd_write_66bytes):
2201 lddqu -66(%rsi), %xmm0
2202 movdqu %xmm0, -66(%rdi)
2203L(fwd_write_50bytes):
2204 lddqu -50(%rsi), %xmm0
2205 movdqu %xmm0, -50(%rdi)
2206L(fwd_write_34bytes):
2207 lddqu -34(%rsi), %xmm0
2208 movdqu %xmm0, -34(%rdi)
2209L(fwd_write_18bytes):
2210 lddqu -18(%rsi), %xmm0
2211 lddqu -16(%rsi), %xmm1
2212 movdqu %xmm0, -18(%rdi)
2213 movdqu %xmm1, -16(%rdi)
2214 ret
2215
2216 .p2align 4
2217L(fwd_write_2bytes):
2218 movzwl -2(%rsi), %edx
2219 mov %dx, -2(%rdi)
2220 ret
2221
2222 .p2align 4
2223L(fwd_write_129bytes):
2224 lddqu -129(%rsi), %xmm0
2225 movdqu %xmm0, -129(%rdi)
2226L(fwd_write_113bytes):
2227 lddqu -113(%rsi), %xmm0
2228 movdqu %xmm0, -113(%rdi)
2229L(fwd_write_97bytes):
2230 lddqu -97(%rsi), %xmm0
2231 movdqu %xmm0, -97(%rdi)
2232L(fwd_write_81bytes):
2233 lddqu -81(%rsi), %xmm0
2234 movdqu %xmm0, -81(%rdi)
2235L(fwd_write_65bytes):
2236 lddqu -65(%rsi), %xmm0
2237 movdqu %xmm0, -65(%rdi)
2238L(fwd_write_49bytes):
2239 lddqu -49(%rsi), %xmm0
2240 movdqu %xmm0, -49(%rdi)
2241L(fwd_write_33bytes):
2242 lddqu -33(%rsi), %xmm0
2243 movdqu %xmm0, -33(%rdi)
2244L(fwd_write_17bytes):
2245 lddqu -17(%rsi), %xmm0
2246 lddqu -16(%rsi), %xmm1
2247 movdqu %xmm0, -17(%rdi)
2248 movdqu %xmm1, -16(%rdi)
2249 ret
2250
2251 .p2align 4
2252L(fwd_write_1bytes):
2253 movzbl -1(%rsi), %edx
2254 mov %dl, -1(%rdi)
2255 ret
2256
2257 .p2align 4
2258L(bwd_write_128bytes):
2259 lddqu 112(%rsi), %xmm0
2260 movdqu %xmm0, 112(%rdi)
2261L(bwd_write_112bytes):
2262 lddqu 96(%rsi), %xmm0
2263 movdqu %xmm0, 96(%rdi)
2264L(bwd_write_96bytes):
2265 lddqu 80(%rsi), %xmm0
2266 movdqu %xmm0, 80(%rdi)
2267L(bwd_write_80bytes):
2268 lddqu 64(%rsi), %xmm0
2269 movdqu %xmm0, 64(%rdi)
2270L(bwd_write_64bytes):
2271 lddqu 48(%rsi), %xmm0
2272 movdqu %xmm0, 48(%rdi)
2273L(bwd_write_48bytes):
2274 lddqu 32(%rsi), %xmm0
2275 movdqu %xmm0, 32(%rdi)
2276L(bwd_write_32bytes):
2277 lddqu 16(%rsi), %xmm0
2278 movdqu %xmm0, 16(%rdi)
2279L(bwd_write_16bytes):
2280 lddqu (%rsi), %xmm0
2281 movdqu %xmm0, (%rdi)
2282L(bwd_write_0bytes):
2283 ret
2284
2285 .p2align 4
2286L(bwd_write_143bytes):
2287 lddqu 127(%rsi), %xmm0
2288 movdqu %xmm0, 127(%rdi)
2289L(bwd_write_127bytes):
2290 lddqu 111(%rsi), %xmm0
2291 movdqu %xmm0, 111(%rdi)
2292L(bwd_write_111bytes):
2293 lddqu 95(%rsi), %xmm0
2294 movdqu %xmm0, 95(%rdi)
2295L(bwd_write_95bytes):
2296 lddqu 79(%rsi), %xmm0
2297 movdqu %xmm0, 79(%rdi)
2298L(bwd_write_79bytes):
2299 lddqu 63(%rsi), %xmm0
2300 movdqu %xmm0, 63(%rdi)
2301L(bwd_write_63bytes):
2302 lddqu 47(%rsi), %xmm0
2303 movdqu %xmm0, 47(%rdi)
2304L(bwd_write_47bytes):
2305 lddqu 31(%rsi), %xmm0
2306 movdqu %xmm0, 31(%rdi)
2307L(bwd_write_31bytes):
2308 lddqu 15(%rsi), %xmm0
2309 lddqu (%rsi), %xmm1
2310 movdqu %xmm0, 15(%rdi)
2311 movdqu %xmm1, (%rdi)
2312 ret
2313
2314
2315 .p2align 4
2316L(bwd_write_15bytes):
2317 mov 7(%rsi), %rdx
2318 mov (%rsi), %rcx
2319 mov %rdx, 7(%rdi)
2320 mov %rcx, (%rdi)
2321 ret
2322
2323 .p2align 4
2324L(bwd_write_142bytes):
2325 lddqu 126(%rsi), %xmm0
2326 movdqu %xmm0, 126(%rdi)
2327L(bwd_write_126bytes):
2328 lddqu 110(%rsi), %xmm0
2329 movdqu %xmm0, 110(%rdi)
2330L(bwd_write_110bytes):
2331 lddqu 94(%rsi), %xmm0
2332 movdqu %xmm0, 94(%rdi)
2333L(bwd_write_94bytes):
2334 lddqu 78(%rsi), %xmm0
2335 movdqu %xmm0, 78(%rdi)
2336L(bwd_write_78bytes):
2337 lddqu 62(%rsi), %xmm0
2338 movdqu %xmm0, 62(%rdi)
2339L(bwd_write_62bytes):
2340 lddqu 46(%rsi), %xmm0
2341 movdqu %xmm0, 46(%rdi)
2342L(bwd_write_46bytes):
2343 lddqu 30(%rsi), %xmm0
2344 movdqu %xmm0, 30(%rdi)
2345L(bwd_write_30bytes):
2346 lddqu 14(%rsi), %xmm0
2347 lddqu (%rsi), %xmm1
2348 movdqu %xmm0, 14(%rdi)
2349 movdqu %xmm1, (%rdi)
2350 ret
2351
2352 .p2align 4
2353L(bwd_write_14bytes):
2354 mov 6(%rsi), %rdx
2355 mov (%rsi), %rcx
2356 mov %rdx, 6(%rdi)
2357 mov %rcx, (%rdi)
2358 ret
2359
2360 .p2align 4
2361L(bwd_write_141bytes):
2362 lddqu 125(%rsi), %xmm0
2363 movdqu %xmm0, 125(%rdi)
2364L(bwd_write_125bytes):
2365 lddqu 109(%rsi), %xmm0
2366 movdqu %xmm0, 109(%rdi)
2367L(bwd_write_109bytes):
2368 lddqu 93(%rsi), %xmm0
2369 movdqu %xmm0, 93(%rdi)
2370L(bwd_write_93bytes):
2371 lddqu 77(%rsi), %xmm0
2372 movdqu %xmm0, 77(%rdi)
2373L(bwd_write_77bytes):
2374 lddqu 61(%rsi), %xmm0
2375 movdqu %xmm0, 61(%rdi)
2376L(bwd_write_61bytes):
2377 lddqu 45(%rsi), %xmm0
2378 movdqu %xmm0, 45(%rdi)
2379L(bwd_write_45bytes):
2380 lddqu 29(%rsi), %xmm0
2381 movdqu %xmm0, 29(%rdi)
2382L(bwd_write_29bytes):
2383 lddqu 13(%rsi), %xmm0
2384 lddqu (%rsi), %xmm1
2385 movdqu %xmm0, 13(%rdi)
2386 movdqu %xmm1, (%rdi)
2387 ret
2388
2389 .p2align 4
2390L(bwd_write_13bytes):
2391 mov 5(%rsi), %rdx
2392 mov (%rsi), %rcx
2393 mov %rdx, 5(%rdi)
2394 mov %rcx, (%rdi)
2395 ret
2396
2397 .p2align 4
2398L(bwd_write_140bytes):
2399 lddqu 124(%rsi), %xmm0
2400 movdqu %xmm0, 124(%rdi)
2401L(bwd_write_124bytes):
2402 lddqu 108(%rsi), %xmm0
2403 movdqu %xmm0, 108(%rdi)
2404L(bwd_write_108bytes):
2405 lddqu 92(%rsi), %xmm0
2406 movdqu %xmm0, 92(%rdi)
2407L(bwd_write_92bytes):
2408 lddqu 76(%rsi), %xmm0
2409 movdqu %xmm0, 76(%rdi)
2410L(bwd_write_76bytes):
2411 lddqu 60(%rsi), %xmm0
2412 movdqu %xmm0, 60(%rdi)
2413L(bwd_write_60bytes):
2414 lddqu 44(%rsi), %xmm0
2415 movdqu %xmm0, 44(%rdi)
2416L(bwd_write_44bytes):
2417 lddqu 28(%rsi), %xmm0
2418 movdqu %xmm0, 28(%rdi)
2419L(bwd_write_28bytes):
2420 lddqu 12(%rsi), %xmm0
2421 lddqu (%rsi), %xmm1
2422 movdqu %xmm0, 12(%rdi)
2423 movdqu %xmm1, (%rdi)
2424 ret
2425
2426 .p2align 4
2427L(bwd_write_12bytes):
2428 mov 4(%rsi), %rdx
2429 mov (%rsi), %rcx
2430 mov %rdx, 4(%rdi)
2431 mov %rcx, (%rdi)
2432 ret
2433
2434 .p2align 4
2435L(bwd_write_139bytes):
2436 lddqu 123(%rsi), %xmm0
2437 movdqu %xmm0, 123(%rdi)
2438L(bwd_write_123bytes):
2439 lddqu 107(%rsi), %xmm0
2440 movdqu %xmm0, 107(%rdi)
2441L(bwd_write_107bytes):
2442 lddqu 91(%rsi), %xmm0
2443 movdqu %xmm0, 91(%rdi)
2444L(bwd_write_91bytes):
2445 lddqu 75(%rsi), %xmm0
2446 movdqu %xmm0, 75(%rdi)
2447L(bwd_write_75bytes):
2448 lddqu 59(%rsi), %xmm0
2449 movdqu %xmm0, 59(%rdi)
2450L(bwd_write_59bytes):
2451 lddqu 43(%rsi), %xmm0
2452 movdqu %xmm0, 43(%rdi)
2453L(bwd_write_43bytes):
2454 lddqu 27(%rsi), %xmm0
2455 movdqu %xmm0, 27(%rdi)
2456L(bwd_write_27bytes):
2457 lddqu 11(%rsi), %xmm0
2458 lddqu (%rsi), %xmm1
2459 movdqu %xmm0, 11(%rdi)
2460 movdqu %xmm1, (%rdi)
2461 ret
2462
2463 .p2align 4
2464L(bwd_write_11bytes):
2465 mov 3(%rsi), %rdx
2466 mov (%rsi), %rcx
2467 mov %rdx, 3(%rdi)
2468 mov %rcx, (%rdi)
2469 ret
2470
2471 .p2align 4
2472L(bwd_write_138bytes):
2473 lddqu 122(%rsi), %xmm0
2474 movdqu %xmm0, 122(%rdi)
2475L(bwd_write_122bytes):
2476 lddqu 106(%rsi), %xmm0
2477 movdqu %xmm0, 106(%rdi)
2478L(bwd_write_106bytes):
2479 lddqu 90(%rsi), %xmm0
2480 movdqu %xmm0, 90(%rdi)
2481L(bwd_write_90bytes):
2482 lddqu 74(%rsi), %xmm0
2483 movdqu %xmm0, 74(%rdi)
2484L(bwd_write_74bytes):
2485 lddqu 58(%rsi), %xmm0
2486 movdqu %xmm0, 58(%rdi)
2487L(bwd_write_58bytes):
2488 lddqu 42(%rsi), %xmm0
2489 movdqu %xmm0, 42(%rdi)
2490L(bwd_write_42bytes):
2491 lddqu 26(%rsi), %xmm0
2492 movdqu %xmm0, 26(%rdi)
2493L(bwd_write_26bytes):
2494 lddqu 10(%rsi), %xmm0
2495 lddqu (%rsi), %xmm1
2496 movdqu %xmm0, 10(%rdi)
2497 movdqu %xmm1, (%rdi)
2498 ret
2499
2500 .p2align 4
2501L(bwd_write_10bytes):
2502 mov 2(%rsi), %rdx
2503 mov (%rsi), %rcx
2504 mov %rdx, 2(%rdi)
2505 mov %rcx, (%rdi)
2506 ret
2507
2508 .p2align 4
2509L(bwd_write_137bytes):
2510 lddqu 121(%rsi), %xmm0
2511 movdqu %xmm0, 121(%rdi)
2512L(bwd_write_121bytes):
2513 lddqu 105(%rsi), %xmm0
2514 movdqu %xmm0, 105(%rdi)
2515L(bwd_write_105bytes):
2516 lddqu 89(%rsi), %xmm0
2517 movdqu %xmm0, 89(%rdi)
2518L(bwd_write_89bytes):
2519 lddqu 73(%rsi), %xmm0
2520 movdqu %xmm0, 73(%rdi)
2521L(bwd_write_73bytes):
2522 lddqu 57(%rsi), %xmm0
2523 movdqu %xmm0, 57(%rdi)
2524L(bwd_write_57bytes):
2525 lddqu 41(%rsi), %xmm0
2526 movdqu %xmm0, 41(%rdi)
2527L(bwd_write_41bytes):
2528 lddqu 25(%rsi), %xmm0
2529 movdqu %xmm0, 25(%rdi)
2530L(bwd_write_25bytes):
2531 lddqu 9(%rsi), %xmm0
2532 lddqu (%rsi), %xmm1
2533 movdqu %xmm0, 9(%rdi)
2534 movdqu %xmm1, (%rdi)
2535 ret
2536
2537 .p2align 4
2538L(bwd_write_9bytes):
2539 mov 1(%rsi), %rdx
2540 mov (%rsi), %rcx
2541 mov %rdx, 1(%rdi)
2542 mov %rcx, (%rdi)
2543 ret
2544
2545 .p2align 4
2546L(bwd_write_136bytes):
2547 lddqu 120(%rsi), %xmm0
2548 movdqu %xmm0, 120(%rdi)
2549L(bwd_write_120bytes):
2550 lddqu 104(%rsi), %xmm0
2551 movdqu %xmm0, 104(%rdi)
2552L(bwd_write_104bytes):
2553 lddqu 88(%rsi), %xmm0
2554 movdqu %xmm0, 88(%rdi)
2555L(bwd_write_88bytes):
2556 lddqu 72(%rsi), %xmm0
2557 movdqu %xmm0, 72(%rdi)
2558L(bwd_write_72bytes):
2559 lddqu 56(%rsi), %xmm0
2560 movdqu %xmm0, 56(%rdi)
2561L(bwd_write_56bytes):
2562 lddqu 40(%rsi), %xmm0
2563 movdqu %xmm0, 40(%rdi)
2564L(bwd_write_40bytes):
2565 lddqu 24(%rsi), %xmm0
2566 movdqu %xmm0, 24(%rdi)
2567L(bwd_write_24bytes):
2568 lddqu 8(%rsi), %xmm0
2569 lddqu (%rsi), %xmm1
2570 movdqu %xmm0, 8(%rdi)
2571 movdqu %xmm1, (%rdi)
2572 ret
2573
2574 .p2align 4
2575L(bwd_write_8bytes):
2576 mov (%rsi), %rdx
2577 mov %rdx, (%rdi)
2578 ret
2579
2580 .p2align 4
2581L(bwd_write_135bytes):
2582 lddqu 119(%rsi), %xmm0
2583 movdqu %xmm0, 119(%rdi)
2584L(bwd_write_119bytes):
2585 lddqu 103(%rsi), %xmm0
2586 movdqu %xmm0, 103(%rdi)
2587L(bwd_write_103bytes):
2588 lddqu 87(%rsi), %xmm0
2589 movdqu %xmm0, 87(%rdi)
2590L(bwd_write_87bytes):
2591 lddqu 71(%rsi), %xmm0
2592 movdqu %xmm0, 71(%rdi)
2593L(bwd_write_71bytes):
2594 lddqu 55(%rsi), %xmm0
2595 movdqu %xmm0, 55(%rdi)
2596L(bwd_write_55bytes):
2597 lddqu 39(%rsi), %xmm0
2598 movdqu %xmm0, 39(%rdi)
2599L(bwd_write_39bytes):
2600 lddqu 23(%rsi), %xmm0
2601 movdqu %xmm0, 23(%rdi)
2602L(bwd_write_23bytes):
2603 lddqu 7(%rsi), %xmm0
2604 lddqu (%rsi), %xmm1
2605 movdqu %xmm0, 7(%rdi)
2606 movdqu %xmm1, (%rdi)
2607 ret
2608
2609 .p2align 4
2610L(bwd_write_7bytes):
2611 mov 3(%rsi), %edx
2612 mov (%rsi), %ecx
2613 mov %edx, 3(%rdi)
2614 mov %ecx, (%rdi)
2615 ret
2616
2617 .p2align 4
2618L(bwd_write_134bytes):
2619 lddqu 118(%rsi), %xmm0
2620 movdqu %xmm0, 118(%rdi)
2621L(bwd_write_118bytes):
2622 lddqu 102(%rsi), %xmm0
2623 movdqu %xmm0, 102(%rdi)
2624L(bwd_write_102bytes):
2625 lddqu 86(%rsi), %xmm0
2626 movdqu %xmm0, 86(%rdi)
2627L(bwd_write_86bytes):
2628 lddqu 70(%rsi), %xmm0
2629 movdqu %xmm0, 70(%rdi)
2630L(bwd_write_70bytes):
2631 lddqu 54(%rsi), %xmm0
2632 movdqu %xmm0, 54(%rdi)
2633L(bwd_write_54bytes):
2634 lddqu 38(%rsi), %xmm0
2635 movdqu %xmm0, 38(%rdi)
2636L(bwd_write_38bytes):
2637 lddqu 22(%rsi), %xmm0
2638 movdqu %xmm0, 22(%rdi)
2639L(bwd_write_22bytes):
2640 lddqu 6(%rsi), %xmm0
2641 lddqu (%rsi), %xmm1
2642 movdqu %xmm0, 6(%rdi)
2643 movdqu %xmm1, (%rdi)
2644 ret
2645
2646 .p2align 4
2647L(bwd_write_6bytes):
2648 mov 2(%rsi), %edx
2649 mov (%rsi), %ecx
2650 mov %edx, 2(%rdi)
2651 mov %ecx, (%rdi)
2652 ret
2653
2654 .p2align 4
2655L(bwd_write_133bytes):
2656 lddqu 117(%rsi), %xmm0
2657 movdqu %xmm0, 117(%rdi)
2658L(bwd_write_117bytes):
2659 lddqu 101(%rsi), %xmm0
2660 movdqu %xmm0, 101(%rdi)
2661L(bwd_write_101bytes):
2662 lddqu 85(%rsi), %xmm0
2663 movdqu %xmm0, 85(%rdi)
2664L(bwd_write_85bytes):
2665 lddqu 69(%rsi), %xmm0
2666 movdqu %xmm0, 69(%rdi)
2667L(bwd_write_69bytes):
2668 lddqu 53(%rsi), %xmm0
2669 movdqu %xmm0, 53(%rdi)
2670L(bwd_write_53bytes):
2671 lddqu 37(%rsi), %xmm0
2672 movdqu %xmm0, 37(%rdi)
2673L(bwd_write_37bytes):
2674 lddqu 21(%rsi), %xmm0
2675 movdqu %xmm0, 21(%rdi)
2676L(bwd_write_21bytes):
2677 lddqu 5(%rsi), %xmm0
2678 lddqu (%rsi), %xmm1
2679 movdqu %xmm0, 5(%rdi)
2680 movdqu %xmm1, (%rdi)
2681 ret
2682
2683 .p2align 4
2684L(bwd_write_5bytes):
2685 mov 1(%rsi), %edx
2686 mov (%rsi), %ecx
2687 mov %edx, 1(%rdi)
2688 mov %ecx, (%rdi)
2689 ret
2690
2691 .p2align 4
2692L(bwd_write_132bytes):
2693 lddqu 116(%rsi), %xmm0
2694 movdqu %xmm0, 116(%rdi)
2695L(bwd_write_116bytes):
2696 lddqu 100(%rsi), %xmm0
2697 movdqu %xmm0, 100(%rdi)
2698L(bwd_write_100bytes):
2699 lddqu 84(%rsi), %xmm0
2700 movdqu %xmm0, 84(%rdi)
2701L(bwd_write_84bytes):
2702 lddqu 68(%rsi), %xmm0
2703 movdqu %xmm0, 68(%rdi)
2704L(bwd_write_68bytes):
2705 lddqu 52(%rsi), %xmm0
2706 movdqu %xmm0, 52(%rdi)
2707L(bwd_write_52bytes):
2708 lddqu 36(%rsi), %xmm0
2709 movdqu %xmm0, 36(%rdi)
2710L(bwd_write_36bytes):
2711 lddqu 20(%rsi), %xmm0
2712 movdqu %xmm0, 20(%rdi)
2713L(bwd_write_20bytes):
2714 lddqu 4(%rsi), %xmm0
2715 lddqu (%rsi), %xmm1
2716 movdqu %xmm0, 4(%rdi)
2717 movdqu %xmm1, (%rdi)
2718 ret
2719
2720 .p2align 4
2721L(bwd_write_4bytes):
2722 mov (%rsi), %edx
2723 mov %edx, (%rdi)
2724 ret
2725
2726 .p2align 4
2727L(bwd_write_131bytes):
2728 lddqu 115(%rsi), %xmm0
2729 movdqu %xmm0, 115(%rdi)
2730L(bwd_write_115bytes):
2731 lddqu 99(%rsi), %xmm0
2732 movdqu %xmm0, 99(%rdi)
2733L(bwd_write_99bytes):
2734 lddqu 83(%rsi), %xmm0
2735 movdqu %xmm0, 83(%rdi)
2736L(bwd_write_83bytes):
2737 lddqu 67(%rsi), %xmm0
2738 movdqu %xmm0, 67(%rdi)
2739L(bwd_write_67bytes):
2740 lddqu 51(%rsi), %xmm0
2741 movdqu %xmm0, 51(%rdi)
2742L(bwd_write_51bytes):
2743 lddqu 35(%rsi), %xmm0
2744 movdqu %xmm0, 35(%rdi)
2745L(bwd_write_35bytes):
2746 lddqu 19(%rsi), %xmm0
2747 movdqu %xmm0, 19(%rdi)
2748L(bwd_write_19bytes):
2749 lddqu 3(%rsi), %xmm0
2750 lddqu (%rsi), %xmm1
2751 movdqu %xmm0, 3(%rdi)
2752 movdqu %xmm1, (%rdi)
2753 ret
2754
2755 .p2align 4
2756L(bwd_write_3bytes):
2757 mov 1(%rsi), %dx
2758 mov (%rsi), %cx
2759 mov %dx, 1(%rdi)
2760 mov %cx, (%rdi)
2761 ret
2762
2763 .p2align 4
2764L(bwd_write_130bytes):
2765 lddqu 114(%rsi), %xmm0
2766 movdqu %xmm0, 114(%rdi)
2767L(bwd_write_114bytes):
2768 lddqu 98(%rsi), %xmm0
2769 movdqu %xmm0, 98(%rdi)
2770L(bwd_write_98bytes):
2771 lddqu 82(%rsi), %xmm0
2772 movdqu %xmm0, 82(%rdi)
2773L(bwd_write_82bytes):
2774 lddqu 66(%rsi), %xmm0
2775 movdqu %xmm0, 66(%rdi)
2776L(bwd_write_66bytes):
2777 lddqu 50(%rsi), %xmm0
2778 movdqu %xmm0, 50(%rdi)
2779L(bwd_write_50bytes):
2780 lddqu 34(%rsi), %xmm0
2781 movdqu %xmm0, 34(%rdi)
2782L(bwd_write_34bytes):
2783 lddqu 18(%rsi), %xmm0
2784 movdqu %xmm0, 18(%rdi)
2785L(bwd_write_18bytes):
2786 lddqu 2(%rsi), %xmm0
2787 lddqu (%rsi), %xmm1
2788 movdqu %xmm0, 2(%rdi)
2789 movdqu %xmm1, (%rdi)
2790 ret
2791
2792 .p2align 4
2793L(bwd_write_2bytes):
2794 movzwl (%rsi), %edx
2795 mov %dx, (%rdi)
2796 ret
2797
2798 .p2align 4
2799L(bwd_write_129bytes):
2800 lddqu 113(%rsi), %xmm0
2801 movdqu %xmm0, 113(%rdi)
2802L(bwd_write_113bytes):
2803 lddqu 97(%rsi), %xmm0
2804 movdqu %xmm0, 97(%rdi)
2805L(bwd_write_97bytes):
2806 lddqu 81(%rsi), %xmm0
2807 movdqu %xmm0, 81(%rdi)
2808L(bwd_write_81bytes):
2809 lddqu 65(%rsi), %xmm0
2810 movdqu %xmm0, 65(%rdi)
2811L(bwd_write_65bytes):
2812 lddqu 49(%rsi), %xmm0
2813 movdqu %xmm0, 49(%rdi)
2814L(bwd_write_49bytes):
2815 lddqu 33(%rsi), %xmm0
2816 movdqu %xmm0, 33(%rdi)
2817L(bwd_write_33bytes):
2818 lddqu 17(%rsi), %xmm0
2819 movdqu %xmm0, 17(%rdi)
2820L(bwd_write_17bytes):
2821 lddqu 1(%rsi), %xmm0
2822 lddqu (%rsi), %xmm1
2823 movdqu %xmm0, 1(%rdi)
2824 movdqu %xmm1, (%rdi)
2825 ret
2826
2827 .p2align 4
2828L(bwd_write_1bytes):
2829 movzbl (%rsi), %edx
2830 mov %dl, (%rdi)
2831 ret
2832
2833END (MEMCPY)
2834
2835 .section .rodata.ssse3,"a",@progbits
2836 ALIGN (3)
2837L(table_144_bytes_bwd):
2838 .int JMPTBL (L(bwd_write_0bytes), L(table_144_bytes_bwd))
2839 .int JMPTBL (L(bwd_write_1bytes), L(table_144_bytes_bwd))
2840 .int JMPTBL (L(bwd_write_2bytes), L(table_144_bytes_bwd))
2841 .int JMPTBL (L(bwd_write_3bytes), L(table_144_bytes_bwd))
2842 .int JMPTBL (L(bwd_write_4bytes), L(table_144_bytes_bwd))
2843 .int JMPTBL (L(bwd_write_5bytes), L(table_144_bytes_bwd))
2844 .int JMPTBL (L(bwd_write_6bytes), L(table_144_bytes_bwd))
2845 .int JMPTBL (L(bwd_write_7bytes), L(table_144_bytes_bwd))
2846 .int JMPTBL (L(bwd_write_8bytes), L(table_144_bytes_bwd))
2847 .int JMPTBL (L(bwd_write_9bytes), L(table_144_bytes_bwd))
2848 .int JMPTBL (L(bwd_write_10bytes), L(table_144_bytes_bwd))
2849 .int JMPTBL (L(bwd_write_11bytes), L(table_144_bytes_bwd))
2850 .int JMPTBL (L(bwd_write_12bytes), L(table_144_bytes_bwd))
2851 .int JMPTBL (L(bwd_write_13bytes), L(table_144_bytes_bwd))
2852 .int JMPTBL (L(bwd_write_14bytes), L(table_144_bytes_bwd))
2853 .int JMPTBL (L(bwd_write_15bytes), L(table_144_bytes_bwd))
2854 .int JMPTBL (L(bwd_write_16bytes), L(table_144_bytes_bwd))
2855 .int JMPTBL (L(bwd_write_17bytes), L(table_144_bytes_bwd))
2856 .int JMPTBL (L(bwd_write_18bytes), L(table_144_bytes_bwd))
2857 .int JMPTBL (L(bwd_write_19bytes), L(table_144_bytes_bwd))
2858 .int JMPTBL (L(bwd_write_20bytes), L(table_144_bytes_bwd))
2859 .int JMPTBL (L(bwd_write_21bytes), L(table_144_bytes_bwd))
2860 .int JMPTBL (L(bwd_write_22bytes), L(table_144_bytes_bwd))
2861 .int JMPTBL (L(bwd_write_23bytes), L(table_144_bytes_bwd))
2862 .int JMPTBL (L(bwd_write_24bytes), L(table_144_bytes_bwd))
2863 .int JMPTBL (L(bwd_write_25bytes), L(table_144_bytes_bwd))
2864 .int JMPTBL (L(bwd_write_26bytes), L(table_144_bytes_bwd))
2865 .int JMPTBL (L(bwd_write_27bytes), L(table_144_bytes_bwd))
2866 .int JMPTBL (L(bwd_write_28bytes), L(table_144_bytes_bwd))
2867 .int JMPTBL (L(bwd_write_29bytes), L(table_144_bytes_bwd))
2868 .int JMPTBL (L(bwd_write_30bytes), L(table_144_bytes_bwd))
2869 .int JMPTBL (L(bwd_write_31bytes), L(table_144_bytes_bwd))
2870 .int JMPTBL (L(bwd_write_32bytes), L(table_144_bytes_bwd))
2871 .int JMPTBL (L(bwd_write_33bytes), L(table_144_bytes_bwd))
2872 .int JMPTBL (L(bwd_write_34bytes), L(table_144_bytes_bwd))
2873 .int JMPTBL (L(bwd_write_35bytes), L(table_144_bytes_bwd))
2874 .int JMPTBL (L(bwd_write_36bytes), L(table_144_bytes_bwd))
2875 .int JMPTBL (L(bwd_write_37bytes), L(table_144_bytes_bwd))
2876 .int JMPTBL (L(bwd_write_38bytes), L(table_144_bytes_bwd))
2877 .int JMPTBL (L(bwd_write_39bytes), L(table_144_bytes_bwd))
2878 .int JMPTBL (L(bwd_write_40bytes), L(table_144_bytes_bwd))
2879 .int JMPTBL (L(bwd_write_41bytes), L(table_144_bytes_bwd))
2880 .int JMPTBL (L(bwd_write_42bytes), L(table_144_bytes_bwd))
2881 .int JMPTBL (L(bwd_write_43bytes), L(table_144_bytes_bwd))
2882 .int JMPTBL (L(bwd_write_44bytes), L(table_144_bytes_bwd))
2883 .int JMPTBL (L(bwd_write_45bytes), L(table_144_bytes_bwd))
2884 .int JMPTBL (L(bwd_write_46bytes), L(table_144_bytes_bwd))
2885 .int JMPTBL (L(bwd_write_47bytes), L(table_144_bytes_bwd))
2886 .int JMPTBL (L(bwd_write_48bytes), L(table_144_bytes_bwd))
2887 .int JMPTBL (L(bwd_write_49bytes), L(table_144_bytes_bwd))
2888 .int JMPTBL (L(bwd_write_50bytes), L(table_144_bytes_bwd))
2889 .int JMPTBL (L(bwd_write_51bytes), L(table_144_bytes_bwd))
2890 .int JMPTBL (L(bwd_write_52bytes), L(table_144_bytes_bwd))
2891 .int JMPTBL (L(bwd_write_53bytes), L(table_144_bytes_bwd))
2892 .int JMPTBL (L(bwd_write_54bytes), L(table_144_bytes_bwd))
2893 .int JMPTBL (L(bwd_write_55bytes), L(table_144_bytes_bwd))
2894 .int JMPTBL (L(bwd_write_56bytes), L(table_144_bytes_bwd))
2895 .int JMPTBL (L(bwd_write_57bytes), L(table_144_bytes_bwd))
2896 .int JMPTBL (L(bwd_write_58bytes), L(table_144_bytes_bwd))
2897 .int JMPTBL (L(bwd_write_59bytes), L(table_144_bytes_bwd))
2898 .int JMPTBL (L(bwd_write_60bytes), L(table_144_bytes_bwd))
2899 .int JMPTBL (L(bwd_write_61bytes), L(table_144_bytes_bwd))
2900 .int JMPTBL (L(bwd_write_62bytes), L(table_144_bytes_bwd))
2901 .int JMPTBL (L(bwd_write_63bytes), L(table_144_bytes_bwd))
2902 .int JMPTBL (L(bwd_write_64bytes), L(table_144_bytes_bwd))
2903 .int JMPTBL (L(bwd_write_65bytes), L(table_144_bytes_bwd))
2904 .int JMPTBL (L(bwd_write_66bytes), L(table_144_bytes_bwd))
2905 .int JMPTBL (L(bwd_write_67bytes), L(table_144_bytes_bwd))
2906 .int JMPTBL (L(bwd_write_68bytes), L(table_144_bytes_bwd))
2907 .int JMPTBL (L(bwd_write_69bytes), L(table_144_bytes_bwd))
2908 .int JMPTBL (L(bwd_write_70bytes), L(table_144_bytes_bwd))
2909 .int JMPTBL (L(bwd_write_71bytes), L(table_144_bytes_bwd))
2910 .int JMPTBL (L(bwd_write_72bytes), L(table_144_bytes_bwd))
2911 .int JMPTBL (L(bwd_write_73bytes), L(table_144_bytes_bwd))
2912 .int JMPTBL (L(bwd_write_74bytes), L(table_144_bytes_bwd))
2913 .int JMPTBL (L(bwd_write_75bytes), L(table_144_bytes_bwd))
2914 .int JMPTBL (L(bwd_write_76bytes), L(table_144_bytes_bwd))
2915 .int JMPTBL (L(bwd_write_77bytes), L(table_144_bytes_bwd))
2916 .int JMPTBL (L(bwd_write_78bytes), L(table_144_bytes_bwd))
2917 .int JMPTBL (L(bwd_write_79bytes), L(table_144_bytes_bwd))
2918 .int JMPTBL (L(bwd_write_80bytes), L(table_144_bytes_bwd))
2919 .int JMPTBL (L(bwd_write_81bytes), L(table_144_bytes_bwd))
2920 .int JMPTBL (L(bwd_write_82bytes), L(table_144_bytes_bwd))
2921 .int JMPTBL (L(bwd_write_83bytes), L(table_144_bytes_bwd))
2922 .int JMPTBL (L(bwd_write_84bytes), L(table_144_bytes_bwd))
2923 .int JMPTBL (L(bwd_write_85bytes), L(table_144_bytes_bwd))
2924 .int JMPTBL (L(bwd_write_86bytes), L(table_144_bytes_bwd))
2925 .int JMPTBL (L(bwd_write_87bytes), L(table_144_bytes_bwd))
2926 .int JMPTBL (L(bwd_write_88bytes), L(table_144_bytes_bwd))
2927 .int JMPTBL (L(bwd_write_89bytes), L(table_144_bytes_bwd))
2928 .int JMPTBL (L(bwd_write_90bytes), L(table_144_bytes_bwd))
2929 .int JMPTBL (L(bwd_write_91bytes), L(table_144_bytes_bwd))
2930 .int JMPTBL (L(bwd_write_92bytes), L(table_144_bytes_bwd))
2931 .int JMPTBL (L(bwd_write_93bytes), L(table_144_bytes_bwd))
2932 .int JMPTBL (L(bwd_write_94bytes), L(table_144_bytes_bwd))
2933 .int JMPTBL (L(bwd_write_95bytes), L(table_144_bytes_bwd))
2934 .int JMPTBL (L(bwd_write_96bytes), L(table_144_bytes_bwd))
2935 .int JMPTBL (L(bwd_write_97bytes), L(table_144_bytes_bwd))
2936 .int JMPTBL (L(bwd_write_98bytes), L(table_144_bytes_bwd))
2937 .int JMPTBL (L(bwd_write_99bytes), L(table_144_bytes_bwd))
2938 .int JMPTBL (L(bwd_write_100bytes), L(table_144_bytes_bwd))
2939 .int JMPTBL (L(bwd_write_101bytes), L(table_144_bytes_bwd))
2940 .int JMPTBL (L(bwd_write_102bytes), L(table_144_bytes_bwd))
2941 .int JMPTBL (L(bwd_write_103bytes), L(table_144_bytes_bwd))
2942 .int JMPTBL (L(bwd_write_104bytes), L(table_144_bytes_bwd))
2943 .int JMPTBL (L(bwd_write_105bytes), L(table_144_bytes_bwd))
2944 .int JMPTBL (L(bwd_write_106bytes), L(table_144_bytes_bwd))
2945 .int JMPTBL (L(bwd_write_107bytes), L(table_144_bytes_bwd))
2946 .int JMPTBL (L(bwd_write_108bytes), L(table_144_bytes_bwd))
2947 .int JMPTBL (L(bwd_write_109bytes), L(table_144_bytes_bwd))
2948 .int JMPTBL (L(bwd_write_110bytes), L(table_144_bytes_bwd))
2949 .int JMPTBL (L(bwd_write_111bytes), L(table_144_bytes_bwd))
2950 .int JMPTBL (L(bwd_write_112bytes), L(table_144_bytes_bwd))
2951 .int JMPTBL (L(bwd_write_113bytes), L(table_144_bytes_bwd))
2952 .int JMPTBL (L(bwd_write_114bytes), L(table_144_bytes_bwd))
2953 .int JMPTBL (L(bwd_write_115bytes), L(table_144_bytes_bwd))
2954 .int JMPTBL (L(bwd_write_116bytes), L(table_144_bytes_bwd))
2955 .int JMPTBL (L(bwd_write_117bytes), L(table_144_bytes_bwd))
2956 .int JMPTBL (L(bwd_write_118bytes), L(table_144_bytes_bwd))
2957 .int JMPTBL (L(bwd_write_119bytes), L(table_144_bytes_bwd))
2958 .int JMPTBL (L(bwd_write_120bytes), L(table_144_bytes_bwd))
2959 .int JMPTBL (L(bwd_write_121bytes), L(table_144_bytes_bwd))
2960 .int JMPTBL (L(bwd_write_122bytes), L(table_144_bytes_bwd))
2961 .int JMPTBL (L(bwd_write_123bytes), L(table_144_bytes_bwd))
2962 .int JMPTBL (L(bwd_write_124bytes), L(table_144_bytes_bwd))
2963 .int JMPTBL (L(bwd_write_125bytes), L(table_144_bytes_bwd))
2964 .int JMPTBL (L(bwd_write_126bytes), L(table_144_bytes_bwd))
2965 .int JMPTBL (L(bwd_write_127bytes), L(table_144_bytes_bwd))
2966 .int JMPTBL (L(bwd_write_128bytes), L(table_144_bytes_bwd))
2967 .int JMPTBL (L(bwd_write_129bytes), L(table_144_bytes_bwd))
2968 .int JMPTBL (L(bwd_write_130bytes), L(table_144_bytes_bwd))
2969 .int JMPTBL (L(bwd_write_131bytes), L(table_144_bytes_bwd))
2970 .int JMPTBL (L(bwd_write_132bytes), L(table_144_bytes_bwd))
2971 .int JMPTBL (L(bwd_write_133bytes), L(table_144_bytes_bwd))
2972 .int JMPTBL (L(bwd_write_134bytes), L(table_144_bytes_bwd))
2973 .int JMPTBL (L(bwd_write_135bytes), L(table_144_bytes_bwd))
2974 .int JMPTBL (L(bwd_write_136bytes), L(table_144_bytes_bwd))
2975 .int JMPTBL (L(bwd_write_137bytes), L(table_144_bytes_bwd))
2976 .int JMPTBL (L(bwd_write_138bytes), L(table_144_bytes_bwd))
2977 .int JMPTBL (L(bwd_write_139bytes), L(table_144_bytes_bwd))
2978 .int JMPTBL (L(bwd_write_140bytes), L(table_144_bytes_bwd))
2979 .int JMPTBL (L(bwd_write_141bytes), L(table_144_bytes_bwd))
2980 .int JMPTBL (L(bwd_write_142bytes), L(table_144_bytes_bwd))
2981 .int JMPTBL (L(bwd_write_143bytes), L(table_144_bytes_bwd))
2982
2983 ALIGN (3)
2984L(table_144_bytes_fwd):
2985 .int JMPTBL (L(fwd_write_0bytes), L(table_144_bytes_fwd))
2986 .int JMPTBL (L(fwd_write_1bytes), L(table_144_bytes_fwd))
2987 .int JMPTBL (L(fwd_write_2bytes), L(table_144_bytes_fwd))
2988 .int JMPTBL (L(fwd_write_3bytes), L(table_144_bytes_fwd))
2989 .int JMPTBL (L(fwd_write_4bytes), L(table_144_bytes_fwd))
2990 .int JMPTBL (L(fwd_write_5bytes), L(table_144_bytes_fwd))
2991 .int JMPTBL (L(fwd_write_6bytes), L(table_144_bytes_fwd))
2992 .int JMPTBL (L(fwd_write_7bytes), L(table_144_bytes_fwd))
2993 .int JMPTBL (L(fwd_write_8bytes), L(table_144_bytes_fwd))
2994 .int JMPTBL (L(fwd_write_9bytes), L(table_144_bytes_fwd))
2995 .int JMPTBL (L(fwd_write_10bytes), L(table_144_bytes_fwd))
2996 .int JMPTBL (L(fwd_write_11bytes), L(table_144_bytes_fwd))
2997 .int JMPTBL (L(fwd_write_12bytes), L(table_144_bytes_fwd))
2998 .int JMPTBL (L(fwd_write_13bytes), L(table_144_bytes_fwd))
2999 .int JMPTBL (L(fwd_write_14bytes), L(table_144_bytes_fwd))
3000 .int JMPTBL (L(fwd_write_15bytes), L(table_144_bytes_fwd))
3001 .int JMPTBL (L(fwd_write_16bytes), L(table_144_bytes_fwd))
3002 .int JMPTBL (L(fwd_write_17bytes), L(table_144_bytes_fwd))
3003 .int JMPTBL (L(fwd_write_18bytes), L(table_144_bytes_fwd))
3004 .int JMPTBL (L(fwd_write_19bytes), L(table_144_bytes_fwd))
3005 .int JMPTBL (L(fwd_write_20bytes), L(table_144_bytes_fwd))
3006 .int JMPTBL (L(fwd_write_21bytes), L(table_144_bytes_fwd))
3007 .int JMPTBL (L(fwd_write_22bytes), L(table_144_bytes_fwd))
3008 .int JMPTBL (L(fwd_write_23bytes), L(table_144_bytes_fwd))
3009 .int JMPTBL (L(fwd_write_24bytes), L(table_144_bytes_fwd))
3010 .int JMPTBL (L(fwd_write_25bytes), L(table_144_bytes_fwd))
3011 .int JMPTBL (L(fwd_write_26bytes), L(table_144_bytes_fwd))
3012 .int JMPTBL (L(fwd_write_27bytes), L(table_144_bytes_fwd))
3013 .int JMPTBL (L(fwd_write_28bytes), L(table_144_bytes_fwd))
3014 .int JMPTBL (L(fwd_write_29bytes), L(table_144_bytes_fwd))
3015 .int JMPTBL (L(fwd_write_30bytes), L(table_144_bytes_fwd))
3016 .int JMPTBL (L(fwd_write_31bytes), L(table_144_bytes_fwd))
3017 .int JMPTBL (L(fwd_write_32bytes), L(table_144_bytes_fwd))
3018 .int JMPTBL (L(fwd_write_33bytes), L(table_144_bytes_fwd))
3019 .int JMPTBL (L(fwd_write_34bytes), L(table_144_bytes_fwd))
3020 .int JMPTBL (L(fwd_write_35bytes), L(table_144_bytes_fwd))
3021 .int JMPTBL (L(fwd_write_36bytes), L(table_144_bytes_fwd))
3022 .int JMPTBL (L(fwd_write_37bytes), L(table_144_bytes_fwd))
3023 .int JMPTBL (L(fwd_write_38bytes), L(table_144_bytes_fwd))
3024 .int JMPTBL (L(fwd_write_39bytes), L(table_144_bytes_fwd))
3025 .int JMPTBL (L(fwd_write_40bytes), L(table_144_bytes_fwd))
3026 .int JMPTBL (L(fwd_write_41bytes), L(table_144_bytes_fwd))
3027 .int JMPTBL (L(fwd_write_42bytes), L(table_144_bytes_fwd))
3028 .int JMPTBL (L(fwd_write_43bytes), L(table_144_bytes_fwd))
3029 .int JMPTBL (L(fwd_write_44bytes), L(table_144_bytes_fwd))
3030 .int JMPTBL (L(fwd_write_45bytes), L(table_144_bytes_fwd))
3031 .int JMPTBL (L(fwd_write_46bytes), L(table_144_bytes_fwd))
3032 .int JMPTBL (L(fwd_write_47bytes), L(table_144_bytes_fwd))
3033 .int JMPTBL (L(fwd_write_48bytes), L(table_144_bytes_fwd))
3034 .int JMPTBL (L(fwd_write_49bytes), L(table_144_bytes_fwd))
3035 .int JMPTBL (L(fwd_write_50bytes), L(table_144_bytes_fwd))
3036 .int JMPTBL (L(fwd_write_51bytes), L(table_144_bytes_fwd))
3037 .int JMPTBL (L(fwd_write_52bytes), L(table_144_bytes_fwd))
3038 .int JMPTBL (L(fwd_write_53bytes), L(table_144_bytes_fwd))
3039 .int JMPTBL (L(fwd_write_54bytes), L(table_144_bytes_fwd))
3040 .int JMPTBL (L(fwd_write_55bytes), L(table_144_bytes_fwd))
3041 .int JMPTBL (L(fwd_write_56bytes), L(table_144_bytes_fwd))
3042 .int JMPTBL (L(fwd_write_57bytes), L(table_144_bytes_fwd))
3043 .int JMPTBL (L(fwd_write_58bytes), L(table_144_bytes_fwd))
3044 .int JMPTBL (L(fwd_write_59bytes), L(table_144_bytes_fwd))
3045 .int JMPTBL (L(fwd_write_60bytes), L(table_144_bytes_fwd))
3046 .int JMPTBL (L(fwd_write_61bytes), L(table_144_bytes_fwd))
3047 .int JMPTBL (L(fwd_write_62bytes), L(table_144_bytes_fwd))
3048 .int JMPTBL (L(fwd_write_63bytes), L(table_144_bytes_fwd))
3049 .int JMPTBL (L(fwd_write_64bytes), L(table_144_bytes_fwd))
3050 .int JMPTBL (L(fwd_write_65bytes), L(table_144_bytes_fwd))
3051 .int JMPTBL (L(fwd_write_66bytes), L(table_144_bytes_fwd))
3052 .int JMPTBL (L(fwd_write_67bytes), L(table_144_bytes_fwd))
3053 .int JMPTBL (L(fwd_write_68bytes), L(table_144_bytes_fwd))
3054 .int JMPTBL (L(fwd_write_69bytes), L(table_144_bytes_fwd))
3055 .int JMPTBL (L(fwd_write_70bytes), L(table_144_bytes_fwd))
3056 .int JMPTBL (L(fwd_write_71bytes), L(table_144_bytes_fwd))
3057 .int JMPTBL (L(fwd_write_72bytes), L(table_144_bytes_fwd))
3058 .int JMPTBL (L(fwd_write_73bytes), L(table_144_bytes_fwd))
3059 .int JMPTBL (L(fwd_write_74bytes), L(table_144_bytes_fwd))
3060 .int JMPTBL (L(fwd_write_75bytes), L(table_144_bytes_fwd))
3061 .int JMPTBL (L(fwd_write_76bytes), L(table_144_bytes_fwd))
3062 .int JMPTBL (L(fwd_write_77bytes), L(table_144_bytes_fwd))
3063 .int JMPTBL (L(fwd_write_78bytes), L(table_144_bytes_fwd))
3064 .int JMPTBL (L(fwd_write_79bytes), L(table_144_bytes_fwd))
3065 .int JMPTBL (L(fwd_write_80bytes), L(table_144_bytes_fwd))
3066 .int JMPTBL (L(fwd_write_81bytes), L(table_144_bytes_fwd))
3067 .int JMPTBL (L(fwd_write_82bytes), L(table_144_bytes_fwd))
3068 .int JMPTBL (L(fwd_write_83bytes), L(table_144_bytes_fwd))
3069 .int JMPTBL (L(fwd_write_84bytes), L(table_144_bytes_fwd))
3070 .int JMPTBL (L(fwd_write_85bytes), L(table_144_bytes_fwd))
3071 .int JMPTBL (L(fwd_write_86bytes), L(table_144_bytes_fwd))
3072 .int JMPTBL (L(fwd_write_87bytes), L(table_144_bytes_fwd))
3073 .int JMPTBL (L(fwd_write_88bytes), L(table_144_bytes_fwd))
3074 .int JMPTBL (L(fwd_write_89bytes), L(table_144_bytes_fwd))
3075 .int JMPTBL (L(fwd_write_90bytes), L(table_144_bytes_fwd))
3076 .int JMPTBL (L(fwd_write_91bytes), L(table_144_bytes_fwd))
3077 .int JMPTBL (L(fwd_write_92bytes), L(table_144_bytes_fwd))
3078 .int JMPTBL (L(fwd_write_93bytes), L(table_144_bytes_fwd))
3079 .int JMPTBL (L(fwd_write_94bytes), L(table_144_bytes_fwd))
3080 .int JMPTBL (L(fwd_write_95bytes), L(table_144_bytes_fwd))
3081 .int JMPTBL (L(fwd_write_96bytes), L(table_144_bytes_fwd))
3082 .int JMPTBL (L(fwd_write_97bytes), L(table_144_bytes_fwd))
3083 .int JMPTBL (L(fwd_write_98bytes), L(table_144_bytes_fwd))
3084 .int JMPTBL (L(fwd_write_99bytes), L(table_144_bytes_fwd))
3085 .int JMPTBL (L(fwd_write_100bytes), L(table_144_bytes_fwd))
3086 .int JMPTBL (L(fwd_write_101bytes), L(table_144_bytes_fwd))
3087 .int JMPTBL (L(fwd_write_102bytes), L(table_144_bytes_fwd))
3088 .int JMPTBL (L(fwd_write_103bytes), L(table_144_bytes_fwd))
3089 .int JMPTBL (L(fwd_write_104bytes), L(table_144_bytes_fwd))
3090 .int JMPTBL (L(fwd_write_105bytes), L(table_144_bytes_fwd))
3091 .int JMPTBL (L(fwd_write_106bytes), L(table_144_bytes_fwd))
3092 .int JMPTBL (L(fwd_write_107bytes), L(table_144_bytes_fwd))
3093 .int JMPTBL (L(fwd_write_108bytes), L(table_144_bytes_fwd))
3094 .int JMPTBL (L(fwd_write_109bytes), L(table_144_bytes_fwd))
3095 .int JMPTBL (L(fwd_write_110bytes), L(table_144_bytes_fwd))
3096 .int JMPTBL (L(fwd_write_111bytes), L(table_144_bytes_fwd))
3097 .int JMPTBL (L(fwd_write_112bytes), L(table_144_bytes_fwd))
3098 .int JMPTBL (L(fwd_write_113bytes), L(table_144_bytes_fwd))
3099 .int JMPTBL (L(fwd_write_114bytes), L(table_144_bytes_fwd))
3100 .int JMPTBL (L(fwd_write_115bytes), L(table_144_bytes_fwd))
3101 .int JMPTBL (L(fwd_write_116bytes), L(table_144_bytes_fwd))
3102 .int JMPTBL (L(fwd_write_117bytes), L(table_144_bytes_fwd))
3103 .int JMPTBL (L(fwd_write_118bytes), L(table_144_bytes_fwd))
3104 .int JMPTBL (L(fwd_write_119bytes), L(table_144_bytes_fwd))
3105 .int JMPTBL (L(fwd_write_120bytes), L(table_144_bytes_fwd))
3106 .int JMPTBL (L(fwd_write_121bytes), L(table_144_bytes_fwd))
3107 .int JMPTBL (L(fwd_write_122bytes), L(table_144_bytes_fwd))
3108 .int JMPTBL (L(fwd_write_123bytes), L(table_144_bytes_fwd))
3109 .int JMPTBL (L(fwd_write_124bytes), L(table_144_bytes_fwd))
3110 .int JMPTBL (L(fwd_write_125bytes), L(table_144_bytes_fwd))
3111 .int JMPTBL (L(fwd_write_126bytes), L(table_144_bytes_fwd))
3112 .int JMPTBL (L(fwd_write_127bytes), L(table_144_bytes_fwd))
3113 .int JMPTBL (L(fwd_write_128bytes), L(table_144_bytes_fwd))
3114 .int JMPTBL (L(fwd_write_129bytes), L(table_144_bytes_fwd))
3115 .int JMPTBL (L(fwd_write_130bytes), L(table_144_bytes_fwd))
3116 .int JMPTBL (L(fwd_write_131bytes), L(table_144_bytes_fwd))
3117 .int JMPTBL (L(fwd_write_132bytes), L(table_144_bytes_fwd))
3118 .int JMPTBL (L(fwd_write_133bytes), L(table_144_bytes_fwd))
3119 .int JMPTBL (L(fwd_write_134bytes), L(table_144_bytes_fwd))
3120 .int JMPTBL (L(fwd_write_135bytes), L(table_144_bytes_fwd))
3121 .int JMPTBL (L(fwd_write_136bytes), L(table_144_bytes_fwd))
3122 .int JMPTBL (L(fwd_write_137bytes), L(table_144_bytes_fwd))
3123 .int JMPTBL (L(fwd_write_138bytes), L(table_144_bytes_fwd))
3124 .int JMPTBL (L(fwd_write_139bytes), L(table_144_bytes_fwd))
3125 .int JMPTBL (L(fwd_write_140bytes), L(table_144_bytes_fwd))
3126 .int JMPTBL (L(fwd_write_141bytes), L(table_144_bytes_fwd))
3127 .int JMPTBL (L(fwd_write_142bytes), L(table_144_bytes_fwd))
3128 .int JMPTBL (L(fwd_write_143bytes), L(table_144_bytes_fwd))
3129
3130 ALIGN (3)
3131L(shl_table_fwd):
3132 .int JMPTBL (L(shl_0), L(shl_table_fwd))
3133 .int JMPTBL (L(shl_1), L(shl_table_fwd))
3134 .int JMPTBL (L(shl_2), L(shl_table_fwd))
3135 .int JMPTBL (L(shl_3), L(shl_table_fwd))
3136 .int JMPTBL (L(shl_4), L(shl_table_fwd))
3137 .int JMPTBL (L(shl_5), L(shl_table_fwd))
3138 .int JMPTBL (L(shl_6), L(shl_table_fwd))
3139 .int JMPTBL (L(shl_7), L(shl_table_fwd))
3140 .int JMPTBL (L(shl_8), L(shl_table_fwd))
3141 .int JMPTBL (L(shl_9), L(shl_table_fwd))
3142 .int JMPTBL (L(shl_10), L(shl_table_fwd))
3143 .int JMPTBL (L(shl_11), L(shl_table_fwd))
3144 .int JMPTBL (L(shl_12), L(shl_table_fwd))
3145 .int JMPTBL (L(shl_13), L(shl_table_fwd))
3146 .int JMPTBL (L(shl_14), L(shl_table_fwd))
3147 .int JMPTBL (L(shl_15), L(shl_table_fwd))
3148
3149 ALIGN (3)
3150L(shl_table_bwd):
3151 .int JMPTBL (L(shl_0_bwd), L(shl_table_bwd))
3152 .int JMPTBL (L(shl_1_bwd), L(shl_table_bwd))
3153 .int JMPTBL (L(shl_2_bwd), L(shl_table_bwd))
3154 .int JMPTBL (L(shl_3_bwd), L(shl_table_bwd))
3155 .int JMPTBL (L(shl_4_bwd), L(shl_table_bwd))
3156 .int JMPTBL (L(shl_5_bwd), L(shl_table_bwd))
3157 .int JMPTBL (L(shl_6_bwd), L(shl_table_bwd))
3158 .int JMPTBL (L(shl_7_bwd), L(shl_table_bwd))
3159 .int JMPTBL (L(shl_8_bwd), L(shl_table_bwd))
3160 .int JMPTBL (L(shl_9_bwd), L(shl_table_bwd))
3161 .int JMPTBL (L(shl_10_bwd), L(shl_table_bwd))
3162 .int JMPTBL (L(shl_11_bwd), L(shl_table_bwd))
3163 .int JMPTBL (L(shl_12_bwd), L(shl_table_bwd))
3164 .int JMPTBL (L(shl_13_bwd), L(shl_table_bwd))
3165 .int JMPTBL (L(shl_14_bwd), L(shl_table_bwd))
3166 .int JMPTBL (L(shl_15_bwd), L(shl_table_bwd))
3167
3168#endif