Warning: Not completed and haven't tested.
movl DEST(%esp), %edx
movl SRC(%esp), %ecx
+#ifdef __CHKP__
+ bndldx DEST(%esp,%edx,1), %bnd0
+ bndldx SRC(%esp,%ecx,1), %bnd1
+ bndcl (%ecx), %bnd1
+ bndcu (%ecx), %bnd1
+#endif
testb $0xff, (%ecx) /* Is source string empty? */
jz L(8) /* yes => return */
+#ifdef __CHKP__
+ bndcl (%edx), %bnd0
+ bndcu (%edx), %bnd0
+#endif
/* Test the first bytes separately until destination is aligned. */
testl $3, %edx /* destination pointer aligned? */
L(4): addl $16,%edx /* increment destination pointer for round */
-L(1): movl (%edx), %eax /* get word (= 4 bytes) in question */
+L(1):
+#ifdef __CHKP__
+ bndcu (%edx), %bnd0
+#endif
+ movl (%edx), %eax /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
/* If you compare this with the algorithm in memchr.S you will
/* If at least one byte of the word is C we don't get 0 in %ecx. */
jnz L(3)
+#ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+#endif
movl 4(%edx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
the addition will not result in 0. */
jnz L(5) /* one byte is NUL => stop copying */
+#ifdef __CHKP__
+ bndcu 8(%edx), %bnd0
+#endif
movl 8(%edx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
the addition will not result in 0. */
jnz L(6) /* one byte is NUL => stop copying */
+#ifdef __CHKP__
+ bndcu 12(%edx), %bnd0
+#endif
movl 12(%edx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
/* Now we have to align the source pointer. */
testl $3, %ecx /* pointer correctly aligned? */
jz L(29) /* yes => start copy loop */
+#ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+ bndcu (%ecx, %edx), %bnd0
+#endif
movb (%ecx), %al /* get first byte */
movb %al, (%ecx,%edx) /* and store it */
andb %al, %al /* is byte NUL? */
testl $3, %ecx /* pointer correctly aligned? */
jz L(29) /* yes => start copy loop */
+#ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+ bndcu (%ecx, %edx), %bnd0
+#endif
movb (%ecx), %al /* get first byte */
movb %al, (%ecx,%edx) /* and store it */
andb %al, %al /* is byte NUL? */
testl $3, %ecx /* pointer correctly aligned? */
jz L(29) /* yes => start copy loop */
+#ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+ bndcu (%ecx, %edx), %bnd0
+#endif
movb (%ecx), %al /* get first byte */
movb %al, (%ecx,%edx) /* and store it */
andb %al, %al /* is byte NUL? */
ALIGN(4)
-L(28): movl %eax, 12(%ecx,%edx)/* store word at destination */
+L(28):
+#ifdef __CHKP__
+ bndcu 12(%ecx, %edx), %bnd0
+#endif
+ movl %eax, 12(%ecx,%edx)/* store word at destination */
addl $16, %ecx /* adjust pointer for full round */
-L(29): movl (%ecx), %eax /* get word from source */
+L(29):
+#ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+#endif
+ movl (%ecx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
carry bits reported for each byte which
incl %edi /* add 1: if one carry bit was *not* set
the addition will not result in 0. */
jnz L(9) /* one byte is NUL => stop copying */
+#ifdef __CHKP__
+ bndcu (%ecx, %edx), %bnd0
+#endif
movl %eax, (%ecx,%edx) /* store word to destination */
+#ifdef __CHKP__
+ bndcu 4(%ecx), %bnd1
+#endif
movl 4(%ecx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
incl %edi /* add 1: if one carry bit was *not* set
the addition will not result in 0. */
jnz L(91) /* one byte is NUL => stop copying */
+#ifdef __CHKP__
+ bndcu 4(%ecx, %edx), %bnd0
+#endif
movl %eax, 4(%ecx,%edx) /* store word to destination */
+#ifdef __CHKP__
+ bndcu 8(%ecx), %bnd1
+#endif
movl 8(%ecx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
incl %edi /* add 1: if one carry bit was *not* set
the addition will not result in 0. */
jnz L(92) /* one byte is NUL => stop copying */
+#ifdef __CHKP__
+ bndcu 8(%ecx, %edx), %bnd0
+#endif
movl %eax, 8(%ecx,%edx) /* store word to destination */
+#ifdef __CHKP__
+ bndcu 12(%ecx), %bnd1
+#endif
movl 12(%ecx), %eax /* get word from source */
movl $0xfefefeff, %edi /* magic value */
addl %eax, %edi /* add the magic value to the word. We get
L(92): addl $4, %ecx
L(91): addl $4, %ecx
-L(9): movb %al, (%ecx,%edx) /* store first byte of last word */
+L(9):
+#ifdef __CHKP__
+ bndcu (%ecx, %edx), %bnd0
+#endif
+ movb %al, (%ecx,%edx) /* store first byte of last word */
orb %al, %al /* is it NUL? */
jz L(8) /* yes => return */
+#ifdef __CHKP__
+ bndcu 1(%ecx, %edx), %bnd0
+#endif
movb %ah, 1(%ecx,%edx) /* store second byte of last word */
orb %ah, %ah /* is it NUL? */
jz L(8) /* yes => return */
shrl $16, %eax /* make upper bytes accessible */
+#ifdef __CHKP__
+ bndcu 2(%ecx, %edx), %bnd0
+#endif
movb %al, 2(%ecx,%edx) /* store third byte of last word */
orb %al, %al /* is it NUL? */
jz L(8) /* yes => return */
movl STR(%esp), %eax
movl CHR(%esp), %edx
+#ifdef __CHKP__
+ bndldx STR(%esp,%eax,1), %bnd0
+ bndcl (%eax), %bnd0
+#endif
movl %eax, %edi /* duplicate string pointer for later */
cfi_rel_offset (edi, 12)
xorb %dl, %cl /* load single byte and test for NUL */
je L(3) /* yes => return NULL */
+#ifdef __CHKP__
+ bndcu 1(%eax), %bnd0
+#endif
movb 1(%eax), %cl /* load single byte */
incl %eax
jne L(11)
-L(0): movb (%eax), %cl /* load single byte */
+L(0):
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
+ movb (%eax), %cl /* load single byte */
cmpb %cl, %dl /* is byte == C? */
je L(out) /* aligned => return pointer */
four instruction up to `L1' will not be executed in the loop
because the same code is found at the end of the loop, but
there it is executed in parallel with other instructions. */
-L(11): movl (%eax), %ecx
+L(11):
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
+ movl (%eax), %ecx
movl $magic, %ebp
movl $magic, %edi
movl $magic, %esi /* load magic value */
xorl %edx, %ebx /* clear words which are C */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx
addl %ebx, %esi /* (word+magic) */
movl $magic, %esi
xorl %edx, %ebx
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx
addl %ebx, %esi
movl $magic, %esi
xorl %edx, %ebx
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx
addl %ebx, %esi
movl $magic, %esi
xorl %edx, %ebx
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx
addl %ebx, %esi
cfi_rel_offset (edi, 8)
movl SRC(%esp), %esi
cfi_rel_offset (esi, 4)
+#ifdef __CHKP__
+ bndldx DEST(%esp,%edi,1), %bnd0
+ bndldx SRC(%esp,%esi,1), %bnd1
+#endif
xorl %eax, %eax
leal -1(%esi), %ecx
/* 0xb is the distance between 2: and 1: but we avoid writing
1f-2b because the assembler generates worse code. */
leal 0xb(%edx,%ecx,8), %ecx
+# ifdef __CHKP__
+ jmp L(1)
+# endif
#else
leal 1f(,%ecx,8), %ecx
#endif
ENTRY (strlen)
movl STR(%esp), %eax
+#ifdef __CHKP__
+ bndldx STR(%esp,%eax,1), %bnd0
+ bndcu (%eax),%bnd0
+#endif
movl $3, %edx /* load mask (= 3) */
andl %eax, %edx /* separate last two bits of address */
jz L(1) /* aligned => start loop */
jp L(0) /* exactly two bits set */
+#ifdef __CHKP__
+ bndcu (%eax),%bnd0
+#endif
cmpb %dh, (%eax) /* is byte NUL? */
je L(2) /* yes => return */
incl %eax /* increment pointer */
+#ifdef __CHKP__
+ bndcu (%eax),%bnd0
+#endif
cmpb %dh, (%eax) /* is byte NUL? */
je L(2) /* yes => return */
jz L(1)
-L(0): cmpb %dh, (%eax) /* is byte NUL? */
+L(0):
+#ifdef __CHKP__
+ bndcu (%eax),%bnd0
+#endif
+ cmpb %dh, (%eax) /* is byte NUL? */
je L(2) /* yes => return */
incl %eax /* increment pointer */
incl %eax /* increment pointer */
-L(2): subl STR(%esp), %eax /* now compute the length as difference
+L(2):
+#ifdef __CHKP__
+ bndcu (%eax),%bnd0
+#endif
+ subl STR(%esp), %eax /* now compute the length as difference
between start and terminating NUL
character */
ret
movl BLK1(%esp), %eax
movl BLK2(%esp), %edx
movl LEN(%esp), %ecx
+#ifdef __CHKP__
+ bndldx BLK1(%esp,%eax,1), %bnd0
+ bndldx BLK2(%esp,%edx,1), %bnd1
+#endif
cmpl $1, %ecx
jne L(not_1)
+#ifdef __CHKP__
+ bndcl (%eax), %bnd0
+ bndcu (%eax), %bnd0
+ bndcl (%edx), %bnd1
+ bndcu (%edx), %bnd1
+#endif
movzbl (%eax), %ecx /* LEN == 1 */
cmpb (%edx), %cl
jne L(neq)
cfi_rel_offset (ebx, 0)
L(not_1):
jl L(bye) /* LEN == 0 */
+#ifdef __CHKP__
+ bndcl (%eax), %bnd0
+ bndcu (%eax), %bnd0
+ bndcl (%edx), %bnd1
+ bndcu (%edx), %bnd1
+#endif
pushl %esi
cfi_adjust_cfa_offset (4)
ALIGN (4)
L(28bytes):
+#ifdef __CHKP__
+ bndcu -28(%esi), %bnd0
+ bndcu -28(%edx), %bnd1
+#endif
movl -28(%esi), %eax
movl -28(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(24bytes):
+#ifdef __CHKP__
+ bndcu -24(%esi), %bnd0
+ bndcu -24(%edx), %bnd1
+#endif
movl -24(%esi), %eax
movl -24(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(20bytes):
+#ifdef __CHKP__
+ bndcu -20(%esi), %bnd0
+ bndcu -20(%edx), %bnd1
+#endif
movl -20(%esi), %eax
movl -20(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(16bytes):
+#ifdef __CHKP__
+ bndcu -16(%esi), %bnd0
+ bndcu -16(%edx), %bnd1
+#endif
movl -16(%esi), %eax
movl -16(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(12bytes):
+#ifdef __CHKP__
+ bndcu -12(%esi), %bnd0
+ bndcu -12(%edx), %bnd1
+#endif
movl -12(%esi), %eax
movl -12(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(8bytes):
+#ifdef __CHKP__
+ bndcu -8(%esi), %bnd0
+ bndcu -8(%edx), %bnd1
+#endif
movl -8(%esi), %eax
movl -8(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(4bytes):
+#ifdef __CHKP__
+ bndcu -4(%esi), %bnd0
+ bndcu -4(%edx), %bnd1
+#endif
movl -4(%esi), %eax
movl -4(%edx), %ecx
cmpl %ecx, %eax
cfi_rel_offset (esi, 0)
cfi_rel_offset (ebx, 4)
L(29bytes):
+#ifdef __CHKP__
+ bndcu -29(%esi), %bnd0
+ bndcu -29(%edx), %bnd1
+#endif
movl -29(%esi), %eax
movl -29(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(25bytes):
+#ifdef __CHKP__
+ bndcu -25(%esi), %bnd0
+ bndcu -25(%edx), %bnd1
+#endif
movl -25(%esi), %eax
movl -25(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(21bytes):
+#ifdef __CHKP__
+ bndcu -21(%esi), %bnd0
+ bndcu -21(%edx), %bnd1
+#endif
movl -21(%esi), %eax
movl -21(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(17bytes):
+#ifdef __CHKP__
+ bndcu -17(%esi), %bnd0
+ bndcu -17(%edx), %bnd1
+#endif
movl -17(%esi), %eax
movl -17(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(13bytes):
+#ifdef __CHKP__
+ bndcu -13(%esi), %bnd0
+ bndcu -13(%edx), %bnd1
+#endif
movl -13(%esi), %eax
movl -13(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(9bytes):
+#ifdef __CHKP__
+ bndcu -9(%esi), %bnd0
+ bndcu -9(%edx), %bnd1
+#endif
movl -9(%esi), %eax
movl -9(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(5bytes):
+#ifdef __CHKP__
+ bndcu -5(%esi), %bnd0
+ bndcu -5(%edx), %bnd1
+#endif
movl -5(%esi), %eax
movl -5(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(1bytes):
+#ifdef __CHKP__
+ bndcu -1(%esi), %bnd0
+ bndcu -1(%edx), %bnd1
+#endif
movzbl -1(%esi), %eax
cmpb -1(%edx), %al
jne L(set)
cfi_rel_offset (esi, 0)
cfi_rel_offset (ebx, 4)
L(30bytes):
+#ifdef __CHKP__
+ bndcu -30(%esi), %bnd0
+ bndcu -30(%edx), %bnd1
+#endif
movl -30(%esi), %eax
movl -30(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(26bytes):
+#ifdef __CHKP__
+ bndcu -26(%esi), %bnd0
+ bndcu -26(%edx), %bnd1
+#endif
movl -26(%esi), %eax
movl -26(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(22bytes):
+#ifdef __CHKP__
+ bndcu -22(%esi), %bnd0
+ bndcu -22(%edx), %bnd1
+#endif
movl -22(%esi), %eax
movl -22(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(18bytes):
+#ifdef __CHKP__
+ bndcu -18(%esi), %bnd0
+ bndcu -18(%edx), %bnd1
+#endif
movl -18(%esi), %eax
movl -18(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(14bytes):
+#ifdef __CHKP__
+ bndcu -14(%esi), %bnd0
+ bndcu -14(%edx), %bnd1
+#endif
movl -14(%esi), %eax
movl -14(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(10bytes):
+#ifdef __CHKP__
+ bndcu -10(%esi), %bnd0
+ bndcu -10(%edx), %bnd1
+#endif
movl -10(%esi), %eax
movl -10(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(6bytes):
+#ifdef __CHKP__
+ bndcu -6(%esi), %bnd0
+ bndcu -6(%edx), %bnd1
+#endif
movl -6(%esi), %eax
movl -6(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(2bytes):
+#ifdef __CHKP__
+ bndcu -2(%esi), %bnd0
+ bndcu -2(%edx), %bnd1
+#endif
movzwl -2(%esi), %eax
movzwl -2(%edx), %ecx
cmpb %cl, %al
cfi_rel_offset (esi, 0)
cfi_rel_offset (ebx, 4)
L(31bytes):
+#ifdef __CHKP__
+ bndcu -31(%esi), %bnd0
+ bndcu -31(%edx), %bnd1
+#endif
movl -31(%esi), %eax
movl -31(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(27bytes):
+#ifdef __CHKP__
+ bndcu -27(%esi), %bnd0
+ bndcu -27(%edx), %bnd1
+#endif
movl -27(%esi), %eax
movl -27(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(23bytes):
+#ifdef __CHKP__
+ bndcu -23(%esi), %bnd0
+ bndcu -23(%edx), %bnd1
+#endif
movl -23(%esi), %eax
movl -23(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(19bytes):
+#ifdef __CHKP__
+ bndcu -19(%esi), %bnd0
+ bndcu -19(%edx), %bnd1
+#endif
movl -19(%esi), %eax
movl -19(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(15bytes):
+#ifdef __CHKP__
+ bndcu -15(%esi), %bnd0
+ bndcu -15(%edx), %bnd1
+#endif
movl -15(%esi), %eax
movl -15(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(11bytes):
+#ifdef __CHKP__
+ bndcu -11(%esi), %bnd0
+ bndcu -11(%edx), %bnd1
+#endif
movl -11(%esi), %eax
movl -11(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(7bytes):
+#ifdef __CHKP__
+ bndcu -7(%esi), %bnd0
+ bndcu -7(%edx), %bnd1
+#endif
movl -7(%esi), %eax
movl -7(%edx), %ecx
cmpl %ecx, %eax
jne L(find_diff)
L(3bytes):
+#ifdef __CHKP__
+ bndcu -3(%esi), %bnd0
+ bndcu -3(%edx), %bnd1
+#endif
movzwl -3(%esi), %eax
movzwl -3(%edx), %ecx
cmpb %cl, %al
L(32bytesormore):
subl $32, %ecx
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+ bndcu (%edx), %bnd1
+#endif
movl (%esi), %eax
cmpl (%edx), %eax
jne L(load_ecx)
+#ifdef __CHKP__
+ bndcu 4(%esi), %bnd0
+ bndcu 4(%edx), %bnd1
+#endif
movl 4(%esi), %eax
cmpl 4(%edx), %eax
jne L(load_ecx_4)
+#ifdef __CHKP__
+ bndcu 8(%esi), %bnd0
+ bndcu 8(%edx), %bnd1
+#endif
movl 8(%esi), %eax
cmpl 8(%edx), %eax
jne L(load_ecx_8)
+#ifdef __CHKP__
+ bndcu 12(%esi), %bnd0
+ bndcu 12(%edx), %bnd1
+#endif
movl 12(%esi), %eax
cmpl 12(%edx), %eax
jne L(load_ecx_12)
+#ifdef __CHKP__
+ bndcu 16(%esi), %bnd0
+ bndcu 16(%edx), %bnd1
+#endif
movl 16(%esi), %eax
cmpl 16(%edx), %eax
jne L(load_ecx_16)
+#ifdef __CHKP__
+ bndcu 20(%esi), %bnd0
+ bndcu 20(%edx), %bnd1
+#endif
movl 20(%esi), %eax
cmpl 20(%edx), %eax
jne L(load_ecx_20)
+#ifdef __CHKP__
+ bndcu 24(%esi), %bnd0
+ bndcu 24(%edx), %bnd1
+#endif
movl 24(%esi), %eax
cmpl 24(%edx), %eax
jne L(load_ecx_24)
+#ifdef __CHKP__
+ bndcu 28(%esi), %bnd0
+ bndcu 28(%edx), %bnd1
+#endif
movl 28(%esi), %eax
cmpl 28(%edx), %eax
jne L(load_ecx_28)
cfi_adjust_cfa_offset (4)
movl DEST(%esp), %edx
movl LEN(%esp), %ecx
+#ifdef __CHKP__
+ bndldx DEST(%esp,%edx,1),%bnd0
+ bndcl (%edx), %bnd0
+ bndcu -1(%edx, %ecx), %bnd0
+#endif
#if BZERO_P
xorl %eax, %eax /* fill with 0 */
#else
ifeq ($(subdir),string)
gen-as-const-headers += locale-defines.sym
-sysdep_routines += bzero-sse2 memset-sse2 memcpy-ssse3 mempcpy-ssse3 \
- memmove-ssse3 memcpy-ssse3-rep mempcpy-ssse3-rep \
- memmove-ssse3-rep bcopy-ssse3 bcopy-ssse3-rep \
+sysdep_routines += bzero-sse2 memset-sse2 \
memset-sse2-rep bzero-sse2-rep strcmp-ssse3 \
strcmp-sse4 strncmp-c strncmp-ssse3 strncmp-sse4 \
memcmp-ssse3 memcmp-sse4 strcasestr-nonascii varshift \
strnlen-sse2 strnlen-c \
strcasecmp_l-c strcasecmp-c strcasecmp_l-ssse3 \
strncase_l-c strncase-c strncase_l-ssse3 \
- strcasecmp_l-sse4 strncase_l-sse4
+ strcasecmp_l-sse4 strncase_l-sse4 mpx_memcpy_nobnd \
+ mpx_mempcpy_nobnd mpx_memmove_nobnd
ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
CFLAGS-varshift.c += -msse4
GLIBC_PRIVATE {
__get_cpu_features;
}
+%ifdef __CHKP__
+ GLIBC_2.14 {
+ mpx_memcpy_nobnd;
+ mpx_memmove_nobnd;
+ mpx_mempcpy_nobnd;
+ }
+%endif
}
--- /dev/null
+#include <stddef.h>
+
+void
+bcopy (const void *src, void *dst, size_t n)
+{
+ memmove (dst, src, n);
+}
size_t i = 0;
/* Support sysdeps/i386/i686/multiarch/bcopy.S. */
- IFUNC_IMPL (i, name, bcopy,
- IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3,
- __bcopy_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3, __bcopy_ssse3)
- IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32))
+// IFUNC_IMPL (i, name, bcopy,
+// IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3,
+// __bcopy_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, bcopy, HAS_SSSE3, __bcopy_ssse3)
+// IFUNC_IMPL_ADD (array, i, bcopy, 1, __bcopy_ia32))
/* Support sysdeps/i386/i686/multiarch/bzero.S. */
IFUNC_IMPL (i, name, bzero,
IFUNC_IMPL_ADD (array, i, memcmp, 1, __memcmp_ia32))
/* Support sysdeps/i386/i686/multiarch/memmove_chk.S. */
- IFUNC_IMPL (i, name, __memmove_chk,
- IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
- __memmove_chk_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
- __memmove_chk_ssse3)
- IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
- __memmove_chk_ia32))
+// IFUNC_IMPL (i, name, __memmove_chk,
+// IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
+// __memmove_chk_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, __memmove_chk, HAS_SSSE3,
+// __memmove_chk_ssse3)
+// IFUNC_IMPL_ADD (array, i, __memmove_chk, 1,
+// __memmove_chk_ia32))
/* Support sysdeps/i386/i686/multiarch/memmove.S. */
- IFUNC_IMPL (i, name, memmove,
- IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
- __memmove_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
- __memmove_ssse3)
- IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32))
+// IFUNC_IMPL (i, name, memmove,
+// IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
+// __memmove_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, memmove, HAS_SSSE3,
+// __memmove_ssse3)
+// IFUNC_IMPL_ADD (array, i, memmove, 1, __memmove_ia32))
/* Support sysdeps/i386/i686/multiarch/memrchr.S. */
IFUNC_IMPL (i, name, memrchr,
#ifdef SHARED
/* Support sysdeps/i386/i686/multiarch/memcpy_chk.S. */
- IFUNC_IMPL (i, name, __memcpy_chk,
- IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
- __memcpy_chk_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
- __memcpy_chk_ssse3)
- IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
- __memcpy_chk_ia32))
+// IFUNC_IMPL (i, name, __memcpy_chk,
+// IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
+// __memcpy_chk_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, __memcpy_chk, HAS_SSSE3,
+// __memcpy_chk_ssse3)
+// IFUNC_IMPL_ADD (array, i, __memcpy_chk, 1,
+// __memcpy_chk_ia32))
/* Support sysdeps/i386/i686/multiarch/memcpy.S. */
- IFUNC_IMPL (i, name, memcpy,
- IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3,
- __memcpy_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, __memcpy_ssse3)
- IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32))
+// IFUNC_IMPL (i, name, memcpy,
+// IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3,
+// __memcpy_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, memcpy, HAS_SSSE3, __memcpy_ssse3)
+// IFUNC_IMPL_ADD (array, i, memcpy, 1, __memcpy_ia32))
/* Support sysdeps/i386/i686/multiarch/mempcpy_chk.S. */
- IFUNC_IMPL (i, name, __mempcpy_chk,
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3,
- __mempcpy_chk_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3,
- __mempcpy_chk_ssse3)
- IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
- __mempcpy_chk_ia32))
+// IFUNC_IMPL (i, name, __mempcpy_chk,
+// IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3,
+// __mempcpy_chk_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, __mempcpy_chk, HAS_SSSE3,
+// __mempcpy_chk_ssse3)
+// IFUNC_IMPL_ADD (array, i, __mempcpy_chk, 1,
+// __mempcpy_chk_ia32))
/* Support sysdeps/i386/i686/multiarch/mempcpy.S. */
- IFUNC_IMPL (i, name, mempcpy,
- IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
- __mempcpy_ssse3_rep)
- IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
- __mempcpy_ssse3)
- IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32))
+// IFUNC_IMPL (i, name, mempcpy,
+// IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
+// __mempcpy_ssse3_rep)
+// IFUNC_IMPL_ADD (array, i, mempcpy, HAS_SSSE3,
+// __mempcpy_ssse3)
+// IFUNC_IMPL_ADD (array, i, mempcpy, 1, __mempcpy_ia32))
/* Support sysdeps/i386/i686/multiarch/strlen.S. */
IFUNC_IMPL (i, name, strlen,
# endif
mov %ecx, %eax
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx), %bnd0
+ bndcu (%ecx), %bnd0
+# endif
+
punpcklbw %xmm1, %xmm1
punpcklbw %xmm1, %xmm1
# ifndef USE_AS_RAWMEMCHR
sub %ecx, %edx
jbe L(return_null_1)
-# endif
add %ecx, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
+ ret
+# else
+ add %ecx, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
+# endif
.p2align 4
L(unaligned_no_match_1):
# ifndef USE_AS_RAWMEMCHR
sub $64, %edx
jbe L(exit_loop)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
# else
+
+# ifdef __CHKP__
+ bndcu (%edx), %bnd0
+# endif
movdqa (%edx), %xmm0
# endif
pcmpeqb %xmm1, %xmm0
jnz L(matches)
# ifndef USE_AS_RAWMEMCHR
+# ifdef __CHKP__
+ bndcu 16(%edi), %bnd0
+# endif
movdqa 16(%edi), %xmm2
# else
+
+# ifdef __CHKP__
+ bndcu 16(%edx), %bnd0
+# endif
movdqa 16(%edx), %xmm2
# endif
pcmpeqb %xmm1, %xmm2
jnz L(matches16)
# ifndef USE_AS_RAWMEMCHR
+# ifdef __CHKP__
+ bndcu 32(%edi), %bnd0
+# endif
movdqa 32(%edi), %xmm3
# else
+
+# ifdef __CHKP__
+ bndcu 32(%edx), %bnd0
+# endif
movdqa 32(%edx), %xmm3
# endif
pcmpeqb %xmm1, %xmm3
jnz L(matches32)
# ifndef USE_AS_RAWMEMCHR
+# ifdef __CHKP__
+ bndcu 48(%edi), %bnd0
+# endif
movdqa 48(%edi), %xmm4
# else
+
+# ifdef __CHKP__
+ bndcu 48(%edx), %bnd0
+# endif
movdqa 48(%edx), %xmm4
# endif
pcmpeqb %xmm1, %xmm4
# ifndef USE_AS_RAWMEMCHR
sub $64, %edx
jbe L(exit_loop)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
movdqa 16(%edi), %xmm2
movdqa 32(%edi), %xmm3
movdqa 48(%edi), %xmm4
# else
+
+# ifdef __CHKP__
+ bndcu (%edx), %bnd0
+# endif
movdqa (%edx), %xmm0
movdqa 16(%edx), %xmm2
movdqa 32(%edx), %xmm3
# ifndef USE_AS_RAWMEMCHR
lea 48(%edi, %eax), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
# else
lea 48(%edx, %eax), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
# endif
bsf %eax, %eax
# ifndef USE_AS_RAWMEMCHR
lea -16(%eax, %edi), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
# else
lea -16(%eax, %edx), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
# endif
bsf %eax, %eax
# ifndef USE_AS_RAWMEMCHR
add %edi, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
# else
add %edx, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
# endif
bsf %eax, %eax
# ifndef USE_AS_RAWMEMCHR
lea 16(%eax, %edi), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
# else
lea 16(%eax, %edx), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
# endif
bsf %eax, %eax
# ifndef USE_AS_RAWMEMCHR
lea 32(%eax, %edi), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
# else
lea 32(%eax, %edx), %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
ret
# endif
jbe L(less1bytes)
# endif
+# ifdef __CHKP__
+ bndldx BLK1(%esp,%eax,1), %bnd0
+ bndldx BLK2(%esp,%edx,1), %bnd1
+ bndcl (%eax), %bnd0
+ bndcl (%edx), %bnd1
+ bndcu (%eax), %bnd0
+ bndcu (%edx), %bnd1
+# endif
+
pxor %xmm0, %xmm0
cmp $64, %ecx
ja L(64bytesormore)
cmpb (%edx), %bl
jne L(nonzero)
+# ifdef __CHKP__
+ bndcu 1(%eax), %bnd0
+ bndcu 1(%edx), %bnd1
+# endif
mov 1(%eax), %bl
cmpb 1(%edx), %bl
jne L(nonzero)
cmp $2, %ecx
jz L(0bytes)
+# ifdef __CHKP__
+ bndcu 2(%eax), %bnd0
+ bndcu 2(%edx), %bnd1
+# endif
mov 2(%eax), %bl
cmpb 2(%edx), %bl
jne L(nonzero)
cmp $3, %ecx
jz L(0bytes)
+# ifdef __CHKP__
+ bndcu 3(%eax), %bnd0
+ bndcu 3(%edx), %bnd1
+# endif
mov 3(%eax), %bl
cmpb 3(%edx), %bl
jne L(nonzero)
cmp $4, %ecx
jz L(0bytes)
+# ifdef __CHKP__
+ bndcu 4(%eax), %bnd0
+ bndcu 4(%edx), %bnd1
+# endif
mov 4(%eax), %bl
cmpb 4(%edx), %bl
jne L(nonzero)
cmp $5, %ecx
jz L(0bytes)
+# ifdef __CHKP__
+ bndcu 5(%eax), %bnd0
+ bndcu 5(%edx), %bnd1
+# endif
mov 5(%eax), %bl
cmpb 5(%edx), %bl
jne L(nonzero)
cmp $6, %ecx
jz L(0bytes)
+# ifdef __CHKP__
+ bndcu 6(%eax), %bnd0
+ bndcu 6(%edx), %bnd1
+# endif
mov 6(%eax), %bl
cmpb 6(%edx), %bl
je L(0bytes)
.p2align 4
L(less1bytes):
jb L(0bytesend)
+# ifdef __CHKP__
+ bndldx BLK1(%esp,%eax,1), %bnd0
+ bndldx BLK2(%esp,%edx,1), %bnd1
+ bndcl (%eax), %bnd0
+ bndcl (%edx), %bnd1
+ bndcu (%eax), %bnd0
+ bndcu (%edx), %bnd1
+# endif
movzbl (%eax), %eax
movzbl (%edx), %edx
sub %edx, %eax
ptest %xmm2, %xmm0
jnc L(find_16diff)
+# ifdef __CHKP__
+ bndcu 16(%eax), %bnd0
+ bndcu 16(%edx), %bnd1
+# endif
movdqu 16(%eax), %xmm1
movdqu 16(%edx), %xmm2
pxor %xmm1, %xmm2
ptest %xmm2, %xmm0
jnc L(find_32diff)
+# ifdef __CHKP__
+ bndcu 32(%eax), %bnd0
+ bndcu 32(%edx), %bnd1
+# endif
movdqu 32(%eax), %xmm1
movdqu 32(%edx), %xmm2
pxor %xmm1, %xmm2
ptest %xmm2, %xmm0
jnc L(find_48diff)
+# ifdef __CHKP__
+ bndcu 48(%eax), %bnd0
+ bndcu 48(%edx), %bnd1
+# endif
movdqu 48(%eax), %xmm1
movdqu 48(%edx), %xmm2
pxor %xmm1, %xmm2
--- /dev/null
+#include <stddef.h>
+
+void *
+__memcpy (void *dst, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dst;
+ void *ret = dst;
+ size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+ size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+ if (offset_src != offset_dst)
+ {
+ while (n--)
+ *d++ = *s++;
+ }
+ else
+ {
+ if (offset_src) offset_src = sizeof(size_t) - offset_src;
+ while (n-- && offset_src--)
+ *d++ = *s++;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *d1++ = *s1++;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *d++ = *s++;
+ }
+ return ret;
+}
+
+weak_alias (__memcpy, __GI_memcpy)
+weak_alias (__memcpy, memcpy)
--- /dev/null
+#include <debug/memcpy_chk.c>
--- /dev/null
+#include <stddef.h>
+
+void *
+__memmove (void *dst, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dst;
+ void *ret = dst;
+ size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+ size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+ if (offset_src != offset_dst)
+ {
+ if (s < d)
+ {
+ // backward copying
+ d += n;
+ s += n;
+ while (n--)
+ *--d = *--s;
+ }
+ else
+ // forward copying
+ while (n--)
+ *d++ = *s++;
+ }
+ else
+ {
+ if (s < d)
+ {
+ offset_src = (offset_src + (size_t)src) & (sizeof(size_t) - 1);
+ // backward copying
+ d += n;
+ s += n;
+ while (n-- && offset_src--)
+ *--d = *--s;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *--d1 = *--s1;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *--d = *--s;
+ }
+ else
+ {
+ if (offset_src) offset_src = sizeof(size_t) - offset_src;
+ // forward copying
+ while (n-- && offset_src--)
+ *d++ = *s++;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *d1++ = *s1++;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *d++ = *s++;
+ }
+ }
+ return ret;
+}
+
+weak_alias (__memmove, __GI_memmove)
+weak_alias (__memmove, memmove)
--- /dev/null
+#include <debug/memmove_chk.c>
--- /dev/null
+#include <stddef.h>
+
+void *
+mempcpy (void *dst, const void *src, size_t n)
+{
+ const char *s = src;
+ char *d = dst;
+ void *ret = dst + n;
+ size_t offset_src = ((size_t) s) & (sizeof(size_t) - 1);
+ size_t offset_dst = ((size_t) d) & (sizeof(size_t) - 1);
+
+ if (offset_src != offset_dst)
+ {
+ while (n--)
+ *d++ = *s++;
+ }
+ else
+ {
+ if (offset_src) offset_src = sizeof(size_t) - offset_src;
+ while (n-- && offset_src--)
+ *d++ = *s++;
+ n++;
+ if (!n) return ret;
+ void **d1 = (void **)d;
+ void **s1 = (void **)s;
+ while (n >= sizeof(void *))
+ {
+ n -= sizeof(void *);
+ *d1++ = *s1++;
+ }
+ s = (char *)s1;
+ d = (char *)d1;
+ while (n--)
+ *d++ = *s++;
+ }
+ return ret;
+}
+
+weak_alias (mempcpy, __GI_mempcpy)
+weak_alias (mempcpy, __mempcpy)
--- /dev/null
+#include <debug/mempcpy_chk.c>
movd STR2(%esp), %xmm1
mov LEN(%esp), %edx
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx), %bnd0
+ bndcu -1(%ecx, %edx), %bnd0
+# endif
+
sub $16, %edx
jbe L(length_less16)
ENTRANCE
movl LEN(%esp), %ecx
+
#ifdef USE_AS_BZERO
xor %eax, %eax
#else
or %edx, %eax
#endif
movl DEST(%esp), %edx
+#ifdef __CHKP__
+ bndldx DEST(%esp,%edx,1),%bnd0
+ bndcl (%edx), %bnd0
+ bndcu -1(%edx, %ecx), %bnd0
+#endif
cmp $32, %ecx
jae L(32bytesormore)
--- /dev/null
+/* memcpy with SSSE3 and REP string.
+ Copyright (C) 2010-2013 Free Software Foundation, Inc.
+ Contributed by Intel Corporation.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include <sysdep.h>
+
+#include "asm-syntax.h"
+
+#ifndef MEMCPY
+# define MEMCPY mpx_memcpy_nobnd
+#endif
+
+#ifdef USE_AS_BCOPY
+# define SRC PARMS
+# define DEST SRC+4
+# define LEN DEST+4
+#else
+# define DEST PARMS
+# define SRC DEST+4
+# define LEN SRC+4
+#endif
+
+#define CFI_PUSH(REG) \
+ cfi_adjust_cfa_offset (4); \
+ cfi_rel_offset (REG, 0)
+
+#define CFI_POP(REG) \
+ cfi_adjust_cfa_offset (-4); \
+ cfi_restore (REG)
+
+#define PUSH(REG) pushl REG; CFI_PUSH (REG)
+#define POP(REG) popl REG; CFI_POP (REG)
+
+#ifdef SHARED
+# define PARMS 8 /* Preserve EBX. */
+# define ENTRANCE PUSH (%ebx);
+# define RETURN_END POP (%ebx); ret
+# define RETURN RETURN_END; CFI_PUSH (%ebx)
+# define JMPTBL(I, B) I - B
+
+/* Load an entry in a jump table into EBX and branch to it. TABLE is a
+ jump table with relative offsets. INDEX is a register contains the
+ index into the jump table. SCALE is the scale of INDEX. */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ /* We first load PC into EBX. */ \
+ SETUP_PIC_REG(bx); \
+ /* Get the address of the jump table. */ \
+ addl $(TABLE - .), %ebx; \
+ /* Get the entry and convert the relative offset to the \
+ absolute address. */ \
+ addl (%ebx,INDEX,SCALE), %ebx; \
+ /* We loaded the jump table. Go. */ \
+ jmp *%ebx
+
+# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE) \
+ addl $(TABLE - .), %ebx
+
+# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \
+ addl (%ebx,INDEX,SCALE), %ebx; \
+ /* We loaded the jump table. Go. */ \
+ jmp *%ebx
+#else
+# define PARMS 4
+# define ENTRANCE
+# define RETURN_END ret
+# define RETURN RETURN_END
+# define JMPTBL(I, B) I
+
+/* Branch to an entry in a jump table. TABLE is a jump table with
+ absolute offsets. INDEX is a register contains the index into the
+ jump table. SCALE is the scale of INDEX. */
+# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
+ jmp *TABLE(,INDEX,SCALE)
+
+# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE)
+
+# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \
+ jmp *TABLE(,INDEX,SCALE)
+#endif
+
+ .section .text.ssse3,"ax",@progbits
+ENTRY (MEMCPY)
+ ENTRANCE
+ movl LEN(%esp), %ecx
+ movl SRC(%esp), %eax
+ movl DEST(%esp), %edx
+
+#ifdef __CHKP__
+ bndldx SRC(%esp,%eax,1), %bnd1
+ bndldx DEST(%esp,%edx,1), %bnd0
+ bndcl (%eax), %bnd1
+ bndcu -1(%eax, %ecx), %bnd1
+ bndcl (%edx), %bnd0
+ bndcu -1(%edx, %ecx), %bnd0
+#endif
+
+#ifdef USE_AS_MEMMOVE
+ cmp %eax, %edx
+ jb L(copy_forward)
+ je L(fwd_write_0bytes)
+ cmp $48, %ecx
+ jb L(bk_write_less48bytes)
+ add %ecx, %eax
+ cmp %eax, %edx
+ movl SRC(%esp), %eax
+ jb L(copy_backward)
+
+L(copy_forward):
+#endif
+ cmp $48, %ecx
+ jae L(48bytesormore)
+
+L(fwd_write_less32bytes):
+#ifndef USE_AS_MEMMOVE
+ cmp %dl, %al
+ jb L(bk_write)
+#endif
+ add %ecx, %edx
+ add %ecx, %eax
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+#ifndef USE_AS_MEMMOVE
+L(bk_write):
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
+#endif
+
+ ALIGN (4)
+/* ECX > 32 and EDX is 4 byte aligned. */
+L(48bytesormore):
+ movdqu (%eax), %xmm0
+ PUSH (%edi)
+ movl %edx, %edi
+ and $-16, %edx
+ PUSH (%esi)
+ cfi_remember_state
+ add $16, %edx
+ movl %edi, %esi
+ sub %edx, %edi
+ add %edi, %ecx
+ sub %edi, %eax
+
+#ifdef SHARED_CACHE_SIZE_HALF
+ cmp $SHARED_CACHE_SIZE_HALF, %ecx
+#else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx
+# else
+ cmp __x86_shared_cache_size_half, %ecx
+# endif
+#endif
+
+ mov %eax, %edi
+ jae L(large_page)
+ and $0xf, %edi
+ jz L(shl_0)
+
+ BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4)
+
+ ALIGN (4)
+L(shl_0):
+ movdqu %xmm0, (%esi)
+ xor %edi, %edi
+ cmp $127, %ecx
+ ja L(shl_0_gobble)
+ lea -32(%ecx), %ecx
+L(shl_0_loop):
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+ jb L(shl_0_end)
+
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+ jb L(shl_0_end)
+
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+ jb L(shl_0_end)
+
+ movdqa (%eax, %edi), %xmm0
+ movdqa 16(%eax, %edi), %xmm1
+ sub $32, %ecx
+ movdqa %xmm0, (%edx, %edi)
+ movdqa %xmm1, 16(%edx, %edi)
+ lea 32(%edi), %edi
+L(shl_0_end):
+ lea 32(%ecx), %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ add %edi, %eax
+ POP (%esi)
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+L(shl_0_gobble):
+
+#ifdef DATA_CACHE_SIZE_HALF
+ cmp $DATA_CACHE_SIZE_HALF, %ecx
+#else
+# ifdef SHARED
+ SETUP_PIC_REG(bx)
+ add $_GLOBAL_OFFSET_TABLE_, %ebx
+ mov __x86_data_cache_size_half@GOTOFF(%ebx), %edi
+# else
+ mov __x86_data_cache_size_half, %edi
+# endif
+#endif
+ mov %edi, %esi
+ shr $3, %esi
+ sub %esi, %edi
+ cmp %edi, %ecx
+ jae L(shl_0_gobble_mem_start)
+ sub $128, %ecx
+ ALIGN (4)
+L(shl_0_gobble_cache_loop):
+ movdqa (%eax), %xmm0
+ movaps 0x10(%eax), %xmm1
+ movaps 0x20(%eax), %xmm2
+ movaps 0x30(%eax), %xmm3
+ movaps 0x40(%eax), %xmm4
+ movaps 0x50(%eax), %xmm5
+ movaps 0x60(%eax), %xmm6
+ movaps 0x70(%eax), %xmm7
+ lea 0x80(%eax), %eax
+ sub $128, %ecx
+ movdqa %xmm0, (%edx)
+ movaps %xmm1, 0x10(%edx)
+ movaps %xmm2, 0x20(%edx)
+ movaps %xmm3, 0x30(%edx)
+ movaps %xmm4, 0x40(%edx)
+ movaps %xmm5, 0x50(%edx)
+ movaps %xmm6, 0x60(%edx)
+ movaps %xmm7, 0x70(%edx)
+ lea 0x80(%edx), %edx
+
+ jae L(shl_0_gobble_cache_loop)
+ add $0x80, %ecx
+ cmp $0x40, %ecx
+ jb L(shl_0_cache_less_64bytes)
+
+ movdqa (%eax), %xmm0
+ sub $0x40, %ecx
+ movdqa 0x10(%eax), %xmm1
+
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+
+ movdqa 0x20(%eax), %xmm0
+ movdqa 0x30(%eax), %xmm1
+ add $0x40, %eax
+
+ movdqa %xmm0, 0x20(%edx)
+ movdqa %xmm1, 0x30(%edx)
+ add $0x40, %edx
+L(shl_0_cache_less_64bytes):
+ cmp $0x20, %ecx
+ jb L(shl_0_cache_less_32bytes)
+ movdqa (%eax), %xmm0
+ sub $0x20, %ecx
+ movdqa 0x10(%eax), %xmm1
+ add $0x20, %eax
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+ add $0x20, %edx
+L(shl_0_cache_less_32bytes):
+ cmp $0x10, %ecx
+ jb L(shl_0_cache_less_16bytes)
+ sub $0x10, %ecx
+ movdqa (%eax), %xmm0
+ add $0x10, %eax
+ movdqa %xmm0, (%edx)
+ add $0x10, %edx
+L(shl_0_cache_less_16bytes):
+ add %ecx, %edx
+ add %ecx, %eax
+ POP (%esi)
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_0_gobble_mem_start):
+ cmp %al, %dl
+ je L(copy_page_by_rep)
+ sub $128, %ecx
+L(shl_0_gobble_mem_loop):
+ prefetchnta 0x1c0(%eax)
+ prefetchnta 0x280(%eax)
+ prefetchnta 0x1c0(%edx)
+ prefetchnta 0x280(%edx)
+
+ movdqa (%eax), %xmm0
+ movaps 0x10(%eax), %xmm1
+ movaps 0x20(%eax), %xmm2
+ movaps 0x30(%eax), %xmm3
+ movaps 0x40(%eax), %xmm4
+ movaps 0x50(%eax), %xmm5
+ movaps 0x60(%eax), %xmm6
+ movaps 0x70(%eax), %xmm7
+ lea 0x80(%eax), %eax
+ sub $0x80, %ecx
+ movdqa %xmm0, (%edx)
+ movaps %xmm1, 0x10(%edx)
+ movaps %xmm2, 0x20(%edx)
+ movaps %xmm3, 0x30(%edx)
+ movaps %xmm4, 0x40(%edx)
+ movaps %xmm5, 0x50(%edx)
+ movaps %xmm6, 0x60(%edx)
+ movaps %xmm7, 0x70(%edx)
+ lea 0x80(%edx), %edx
+
+ jae L(shl_0_gobble_mem_loop)
+ add $0x80, %ecx
+ cmp $0x40, %ecx
+ jb L(shl_0_mem_less_64bytes)
+
+ movdqa (%eax), %xmm0
+ sub $0x40, %ecx
+ movdqa 0x10(%eax), %xmm1
+
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+
+ movdqa 0x20(%eax), %xmm0
+ movdqa 0x30(%eax), %xmm1
+ add $0x40, %eax
+
+ movdqa %xmm0, 0x20(%edx)
+ movdqa %xmm1, 0x30(%edx)
+ add $0x40, %edx
+L(shl_0_mem_less_64bytes):
+ cmp $0x20, %ecx
+ jb L(shl_0_mem_less_32bytes)
+ movdqa (%eax), %xmm0
+ sub $0x20, %ecx
+ movdqa 0x10(%eax), %xmm1
+ add $0x20, %eax
+ movdqa %xmm0, (%edx)
+ movdqa %xmm1, 0x10(%edx)
+ add $0x20, %edx
+L(shl_0_mem_less_32bytes):
+ cmp $0x10, %ecx
+ jb L(shl_0_mem_less_16bytes)
+ sub $0x10, %ecx
+ movdqa (%eax), %xmm0
+ add $0x10, %eax
+ movdqa %xmm0, (%edx)
+ add $0x10, %edx
+L(shl_0_mem_less_16bytes):
+ add %ecx, %edx
+ add %ecx, %eax
+ POP (%esi)
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_1):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $1, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_1_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $1, %xmm2, %xmm3
+ palignr $1, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_1_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $1, %xmm2, %xmm3
+ palignr $1, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_1_loop)
+
+L(shl_1_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 1(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_2):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $2, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_2_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $2, %xmm2, %xmm3
+ palignr $2, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_2_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $2, %xmm2, %xmm3
+ palignr $2, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_2_loop)
+
+L(shl_2_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 2(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_3):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $3, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_3_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $3, %xmm2, %xmm3
+ palignr $3, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_3_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $3, %xmm2, %xmm3
+ palignr $3, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_3_loop)
+
+L(shl_3_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 3(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_4):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $4, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_4_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $4, %xmm2, %xmm3
+ palignr $4, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_4_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $4, %xmm2, %xmm3
+ palignr $4, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_4_loop)
+
+L(shl_4_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 4(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_5):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $5, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_5_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $5, %xmm2, %xmm3
+ palignr $5, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_5_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $5, %xmm2, %xmm3
+ palignr $5, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_5_loop)
+
+L(shl_5_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 5(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_6):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $6, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_6_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $6, %xmm2, %xmm3
+ palignr $6, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_6_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $6, %xmm2, %xmm3
+ palignr $6, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_6_loop)
+
+L(shl_6_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 6(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_7):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $7, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_7_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $7, %xmm2, %xmm3
+ palignr $7, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_7_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $7, %xmm2, %xmm3
+ palignr $7, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_7_loop)
+
+L(shl_7_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 7(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_8):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $8, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_8_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $8, %xmm2, %xmm3
+ palignr $8, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_8_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $8, %xmm2, %xmm3
+ palignr $8, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_8_loop)
+
+L(shl_8_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 8(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_9):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $9, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_9_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $9, %xmm2, %xmm3
+ palignr $9, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_9_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $9, %xmm2, %xmm3
+ palignr $9, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_9_loop)
+
+L(shl_9_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 9(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_10):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $10, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_10_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $10, %xmm2, %xmm3
+ palignr $10, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_10_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $10, %xmm2, %xmm3
+ palignr $10, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_10_loop)
+
+L(shl_10_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 10(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_11):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $11, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_11_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $11, %xmm2, %xmm3
+ palignr $11, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_11_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $11, %xmm2, %xmm3
+ palignr $11, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_11_loop)
+
+L(shl_11_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 11(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_12):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $12, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_12_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $12, %xmm2, %xmm3
+ palignr $12, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_12_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $12, %xmm2, %xmm3
+ palignr $12, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_12_loop)
+
+L(shl_12_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 12(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_13):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $13, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_13_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $13, %xmm2, %xmm3
+ palignr $13, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_13_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $13, %xmm2, %xmm3
+ palignr $13, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_13_loop)
+
+L(shl_13_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 13(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_14):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $14, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_14_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $14, %xmm2, %xmm3
+ palignr $14, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_14_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $14, %xmm2, %xmm3
+ palignr $14, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_14_loop)
+
+L(shl_14_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 14(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(shl_15):
+ BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd))
+ sub $15, %eax
+ movaps (%eax), %xmm1
+ xor %edi, %edi
+ sub $32, %ecx
+ movdqu %xmm0, (%esi)
+ POP (%esi)
+L(shl_15_loop):
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm4
+ palignr $15, %xmm2, %xmm3
+ palignr $15, %xmm1, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jb L(shl_15_end)
+
+ movdqa 16(%eax, %edi), %xmm2
+ sub $32, %ecx
+ movdqa 32(%eax, %edi), %xmm3
+ movdqa %xmm3, %xmm1
+ palignr $15, %xmm2, %xmm3
+ palignr $15, %xmm4, %xmm2
+ lea 32(%edi), %edi
+ movdqa %xmm2, -32(%edx, %edi)
+ movdqa %xmm3, -16(%edx, %edi)
+
+ jae L(shl_15_loop)
+
+L(shl_15_end):
+ add $32, %ecx
+ add %ecx, %edi
+ add %edi, %edx
+ lea 15(%edi, %eax), %eax
+ POP (%edi)
+ BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4)
+
+
+ ALIGN (4)
+L(fwd_write_44bytes):
+ movl -44(%eax), %ecx
+ movl %ecx, -44(%edx)
+L(fwd_write_40bytes):
+ movl -40(%eax), %ecx
+ movl %ecx, -40(%edx)
+L(fwd_write_36bytes):
+ movl -36(%eax), %ecx
+ movl %ecx, -36(%edx)
+L(fwd_write_32bytes):
+ movl -32(%eax), %ecx
+ movl %ecx, -32(%edx)
+L(fwd_write_28bytes):
+ movl -28(%eax), %ecx
+ movl %ecx, -28(%edx)
+L(fwd_write_24bytes):
+ movl -24(%eax), %ecx
+ movl %ecx, -24(%edx)
+L(fwd_write_20bytes):
+ movl -20(%eax), %ecx
+ movl %ecx, -20(%edx)
+L(fwd_write_16bytes):
+ movl -16(%eax), %ecx
+ movl %ecx, -16(%edx)
+L(fwd_write_12bytes):
+ movl -12(%eax), %ecx
+ movl %ecx, -12(%edx)
+L(fwd_write_8bytes):
+ movl -8(%eax), %ecx
+ movl %ecx, -8(%edx)
+L(fwd_write_4bytes):
+ movl -4(%eax), %ecx
+ movl %ecx, -4(%edx)
+L(fwd_write_0bytes):
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(fwd_write_5bytes):
+ movl -5(%eax), %ecx
+ movl -4(%eax), %eax
+ movl %ecx, -5(%edx)
+ movl %eax, -4(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(fwd_write_45bytes):
+ movl -45(%eax), %ecx
+ movl %ecx, -45(%edx)
+L(fwd_write_41bytes):
+ movl -41(%eax), %ecx
+ movl %ecx, -41(%edx)
+L(fwd_write_37bytes):
+ movl -37(%eax), %ecx
+ movl %ecx, -37(%edx)
+L(fwd_write_33bytes):
+ movl -33(%eax), %ecx
+ movl %ecx, -33(%edx)
+L(fwd_write_29bytes):
+ movl -29(%eax), %ecx
+ movl %ecx, -29(%edx)
+L(fwd_write_25bytes):
+ movl -25(%eax), %ecx
+ movl %ecx, -25(%edx)
+L(fwd_write_21bytes):
+ movl -21(%eax), %ecx
+ movl %ecx, -21(%edx)
+L(fwd_write_17bytes):
+ movl -17(%eax), %ecx
+ movl %ecx, -17(%edx)
+L(fwd_write_13bytes):
+ movl -13(%eax), %ecx
+ movl %ecx, -13(%edx)
+L(fwd_write_9bytes):
+ movl -9(%eax), %ecx
+ movl %ecx, -9(%edx)
+ movl -5(%eax), %ecx
+ movl %ecx, -5(%edx)
+L(fwd_write_1bytes):
+ movzbl -1(%eax), %ecx
+ movb %cl, -1(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(fwd_write_46bytes):
+ movl -46(%eax), %ecx
+ movl %ecx, -46(%edx)
+L(fwd_write_42bytes):
+ movl -42(%eax), %ecx
+ movl %ecx, -42(%edx)
+L(fwd_write_38bytes):
+ movl -38(%eax), %ecx
+ movl %ecx, -38(%edx)
+L(fwd_write_34bytes):
+ movl -34(%eax), %ecx
+ movl %ecx, -34(%edx)
+L(fwd_write_30bytes):
+ movl -30(%eax), %ecx
+ movl %ecx, -30(%edx)
+L(fwd_write_26bytes):
+ movl -26(%eax), %ecx
+ movl %ecx, -26(%edx)
+L(fwd_write_22bytes):
+ movl -22(%eax), %ecx
+ movl %ecx, -22(%edx)
+L(fwd_write_18bytes):
+ movl -18(%eax), %ecx
+ movl %ecx, -18(%edx)
+L(fwd_write_14bytes):
+ movl -14(%eax), %ecx
+ movl %ecx, -14(%edx)
+L(fwd_write_10bytes):
+ movl -10(%eax), %ecx
+ movl %ecx, -10(%edx)
+L(fwd_write_6bytes):
+ movl -6(%eax), %ecx
+ movl %ecx, -6(%edx)
+L(fwd_write_2bytes):
+ movzwl -2(%eax), %ecx
+ movw %cx, -2(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(fwd_write_47bytes):
+ movl -47(%eax), %ecx
+ movl %ecx, -47(%edx)
+L(fwd_write_43bytes):
+ movl -43(%eax), %ecx
+ movl %ecx, -43(%edx)
+L(fwd_write_39bytes):
+ movl -39(%eax), %ecx
+ movl %ecx, -39(%edx)
+L(fwd_write_35bytes):
+ movl -35(%eax), %ecx
+ movl %ecx, -35(%edx)
+L(fwd_write_31bytes):
+ movl -31(%eax), %ecx
+ movl %ecx, -31(%edx)
+L(fwd_write_27bytes):
+ movl -27(%eax), %ecx
+ movl %ecx, -27(%edx)
+L(fwd_write_23bytes):
+ movl -23(%eax), %ecx
+ movl %ecx, -23(%edx)
+L(fwd_write_19bytes):
+ movl -19(%eax), %ecx
+ movl %ecx, -19(%edx)
+L(fwd_write_15bytes):
+ movl -15(%eax), %ecx
+ movl %ecx, -15(%edx)
+L(fwd_write_11bytes):
+ movl -11(%eax), %ecx
+ movl %ecx, -11(%edx)
+L(fwd_write_7bytes):
+ movl -7(%eax), %ecx
+ movl %ecx, -7(%edx)
+L(fwd_write_3bytes):
+ movzwl -3(%eax), %ecx
+ movzbl -1(%eax), %eax
+ movw %cx, -3(%edx)
+ movb %al, -1(%edx)
+#ifndef USE_AS_BCOPY
+# ifdef USE_AS_MEMPCPY
+ movl %edx, %eax
+# else
+ movl DEST(%esp), %eax
+# endif
+#endif
+ RETURN_END
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(large_page):
+ movdqu (%eax), %xmm1
+ movdqu %xmm0, (%esi)
+ movntdq %xmm1, (%edx)
+ add $0x10, %eax
+ add $0x10, %edx
+ sub $0x10, %ecx
+ cmp %al, %dl
+ je L(copy_page_by_rep)
+L(large_page_loop_init):
+ POP (%esi)
+ sub $0x80, %ecx
+ POP (%edi)
+L(large_page_loop):
+ prefetchnta 0x1c0(%eax)
+ prefetchnta 0x280(%eax)
+ movdqu (%eax), %xmm0
+ movdqu 0x10(%eax), %xmm1
+ movdqu 0x20(%eax), %xmm2
+ movdqu 0x30(%eax), %xmm3
+ movdqu 0x40(%eax), %xmm4
+ movdqu 0x50(%eax), %xmm5
+ movdqu 0x60(%eax), %xmm6
+ movdqu 0x70(%eax), %xmm7
+ lea 0x80(%eax), %eax
+ lfence
+ sub $0x80, %ecx
+ movntdq %xmm0, (%edx)
+ movntdq %xmm1, 0x10(%edx)
+ movntdq %xmm2, 0x20(%edx)
+ movntdq %xmm3, 0x30(%edx)
+ movntdq %xmm4, 0x40(%edx)
+ movntdq %xmm5, 0x50(%edx)
+ movntdq %xmm6, 0x60(%edx)
+ movntdq %xmm7, 0x70(%edx)
+ lea 0x80(%edx), %edx
+ jae L(large_page_loop)
+ add $0x80, %ecx
+ cmp $0x40, %ecx
+ jb L(large_page_less_64bytes)
+
+ movdqu (%eax), %xmm0
+ movdqu 0x10(%eax), %xmm1
+ movdqu 0x20(%eax), %xmm2
+ movdqu 0x30(%eax), %xmm3
+ lea 0x40(%eax), %eax
+
+ movntdq %xmm0, (%edx)
+ movntdq %xmm1, 0x10(%edx)
+ movntdq %xmm2, 0x20(%edx)
+ movntdq %xmm3, 0x30(%edx)
+ lea 0x40(%edx), %edx
+ sub $0x40, %ecx
+L(large_page_less_64bytes):
+ cmp $32, %ecx
+ jb L(large_page_less_32bytes)
+ movdqu (%eax), %xmm0
+ movdqu 0x10(%eax), %xmm1
+ lea 0x20(%eax), %eax
+ movntdq %xmm0, (%edx)
+ movntdq %xmm1, 0x10(%edx)
+ lea 0x20(%edx), %edx
+ sub $0x20, %ecx
+L(large_page_less_32bytes):
+ add %ecx, %edx
+ add %ecx, %eax
+ sfence
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4)
+
+ cfi_restore_state
+ cfi_remember_state
+ ALIGN (4)
+L(copy_page_by_rep):
+ mov %eax, %esi
+ mov %edx, %edi
+ mov %ecx, %edx
+ shr $2, %ecx
+ and $3, %edx
+ rep movsl
+ jz L(copy_page_by_rep_exit)
+ cmp $2, %edx
+ jb L(copy_page_by_rep_left_1)
+ movzwl (%esi), %eax
+ movw %ax, (%edi)
+ add $2, %esi
+ add $2, %edi
+ sub $2, %edx
+ jz L(copy_page_by_rep_exit)
+L(copy_page_by_rep_left_1):
+ movzbl (%esi), %eax
+ movb %al, (%edi)
+L(copy_page_by_rep_exit):
+ POP (%esi)
+ POP (%edi)
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(bk_write_44bytes):
+ movl 40(%eax), %ecx
+ movl %ecx, 40(%edx)
+L(bk_write_40bytes):
+ movl 36(%eax), %ecx
+ movl %ecx, 36(%edx)
+L(bk_write_36bytes):
+ movl 32(%eax), %ecx
+ movl %ecx, 32(%edx)
+L(bk_write_32bytes):
+ movl 28(%eax), %ecx
+ movl %ecx, 28(%edx)
+L(bk_write_28bytes):
+ movl 24(%eax), %ecx
+ movl %ecx, 24(%edx)
+L(bk_write_24bytes):
+ movl 20(%eax), %ecx
+ movl %ecx, 20(%edx)
+L(bk_write_20bytes):
+ movl 16(%eax), %ecx
+ movl %ecx, 16(%edx)
+L(bk_write_16bytes):
+ movl 12(%eax), %ecx
+ movl %ecx, 12(%edx)
+L(bk_write_12bytes):
+ movl 8(%eax), %ecx
+ movl %ecx, 8(%edx)
+L(bk_write_8bytes):
+ movl 4(%eax), %ecx
+ movl %ecx, 4(%edx)
+L(bk_write_4bytes):
+ movl (%eax), %ecx
+ movl %ecx, (%edx)
+L(bk_write_0bytes):
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(bk_write_45bytes):
+ movl 41(%eax), %ecx
+ movl %ecx, 41(%edx)
+L(bk_write_41bytes):
+ movl 37(%eax), %ecx
+ movl %ecx, 37(%edx)
+L(bk_write_37bytes):
+ movl 33(%eax), %ecx
+ movl %ecx, 33(%edx)
+L(bk_write_33bytes):
+ movl 29(%eax), %ecx
+ movl %ecx, 29(%edx)
+L(bk_write_29bytes):
+ movl 25(%eax), %ecx
+ movl %ecx, 25(%edx)
+L(bk_write_25bytes):
+ movl 21(%eax), %ecx
+ movl %ecx, 21(%edx)
+L(bk_write_21bytes):
+ movl 17(%eax), %ecx
+ movl %ecx, 17(%edx)
+L(bk_write_17bytes):
+ movl 13(%eax), %ecx
+ movl %ecx, 13(%edx)
+L(bk_write_13bytes):
+ movl 9(%eax), %ecx
+ movl %ecx, 9(%edx)
+L(bk_write_9bytes):
+ movl 5(%eax), %ecx
+ movl %ecx, 5(%edx)
+L(bk_write_5bytes):
+ movl 1(%eax), %ecx
+ movl %ecx, 1(%edx)
+L(bk_write_1bytes):
+ movzbl (%eax), %ecx
+ movb %cl, (%edx)
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(bk_write_46bytes):
+ movl 42(%eax), %ecx
+ movl %ecx, 42(%edx)
+L(bk_write_42bytes):
+ movl 38(%eax), %ecx
+ movl %ecx, 38(%edx)
+L(bk_write_38bytes):
+ movl 34(%eax), %ecx
+ movl %ecx, 34(%edx)
+L(bk_write_34bytes):
+ movl 30(%eax), %ecx
+ movl %ecx, 30(%edx)
+L(bk_write_30bytes):
+ movl 26(%eax), %ecx
+ movl %ecx, 26(%edx)
+L(bk_write_26bytes):
+ movl 22(%eax), %ecx
+ movl %ecx, 22(%edx)
+L(bk_write_22bytes):
+ movl 18(%eax), %ecx
+ movl %ecx, 18(%edx)
+L(bk_write_18bytes):
+ movl 14(%eax), %ecx
+ movl %ecx, 14(%edx)
+L(bk_write_14bytes):
+ movl 10(%eax), %ecx
+ movl %ecx, 10(%edx)
+L(bk_write_10bytes):
+ movl 6(%eax), %ecx
+ movl %ecx, 6(%edx)
+L(bk_write_6bytes):
+ movl 2(%eax), %ecx
+ movl %ecx, 2(%edx)
+L(bk_write_2bytes):
+ movzwl (%eax), %ecx
+ movw %cx, (%edx)
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN
+
+ ALIGN (4)
+L(bk_write_47bytes):
+ movl 43(%eax), %ecx
+ movl %ecx, 43(%edx)
+L(bk_write_43bytes):
+ movl 39(%eax), %ecx
+ movl %ecx, 39(%edx)
+L(bk_write_39bytes):
+ movl 35(%eax), %ecx
+ movl %ecx, 35(%edx)
+L(bk_write_35bytes):
+ movl 31(%eax), %ecx
+ movl %ecx, 31(%edx)
+L(bk_write_31bytes):
+ movl 27(%eax), %ecx
+ movl %ecx, 27(%edx)
+L(bk_write_27bytes):
+ movl 23(%eax), %ecx
+ movl %ecx, 23(%edx)
+L(bk_write_23bytes):
+ movl 19(%eax), %ecx
+ movl %ecx, 19(%edx)
+L(bk_write_19bytes):
+ movl 15(%eax), %ecx
+ movl %ecx, 15(%edx)
+L(bk_write_15bytes):
+ movl 11(%eax), %ecx
+ movl %ecx, 11(%edx)
+L(bk_write_11bytes):
+ movl 7(%eax), %ecx
+ movl %ecx, 7(%edx)
+L(bk_write_7bytes):
+ movl 3(%eax), %ecx
+ movl %ecx, 3(%edx)
+L(bk_write_3bytes):
+ movzwl 1(%eax), %ecx
+ movw %cx, 1(%edx)
+ movzbl (%eax), %eax
+ movb %al, (%edx)
+#ifndef USE_AS_BCOPY
+ movl DEST(%esp), %eax
+# ifdef USE_AS_MEMPCPY
+ movl LEN(%esp), %ecx
+ add %ecx, %eax
+# endif
+#endif
+ RETURN_END
+
+
+ .pushsection .rodata.ssse3,"a",@progbits
+ ALIGN (2)
+L(table_48bytes_fwd):
+ .int JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_2bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_3bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_4bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_5bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_6bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_7bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_8bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_9bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_10bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_11bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_12bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_13bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_14bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_15bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_16bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_17bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_18bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_19bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_20bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_21bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_22bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_23bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_24bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_25bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_26bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_27bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_28bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_29bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_30bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_31bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_32bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_33bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_34bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_35bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_36bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_37bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_38bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_39bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_40bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_41bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_42bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_43bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_44bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_45bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd))
+ .int JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd))
+
+ ALIGN (2)
+L(shl_table):
+ .int JMPTBL (L(shl_0), L(shl_table))
+ .int JMPTBL (L(shl_1), L(shl_table))
+ .int JMPTBL (L(shl_2), L(shl_table))
+ .int JMPTBL (L(shl_3), L(shl_table))
+ .int JMPTBL (L(shl_4), L(shl_table))
+ .int JMPTBL (L(shl_5), L(shl_table))
+ .int JMPTBL (L(shl_6), L(shl_table))
+ .int JMPTBL (L(shl_7), L(shl_table))
+ .int JMPTBL (L(shl_8), L(shl_table))
+ .int JMPTBL (L(shl_9), L(shl_table))
+ .int JMPTBL (L(shl_10), L(shl_table))
+ .int JMPTBL (L(shl_11), L(shl_table))
+ .int JMPTBL (L(shl_12), L(shl_table))
+ .int JMPTBL (L(shl_13), L(shl_table))
+ .int JMPTBL (L(shl_14), L(shl_table))
+ .int JMPTBL (L(shl_15), L(shl_table))
+
+ ALIGN (2)
+L(table_48_bytes_bwd):
+ .int JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_2bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_3bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_4bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_5bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_6bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_7bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_8bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_9bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_10bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_11bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_12bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_13bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_14bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_15bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_16bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_17bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_18bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_19bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_20bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_21bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_22bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_23bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_24bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_25bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_26bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_27bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_28bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_29bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_30bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_31bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_32bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_33bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_34bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_35bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_36bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_37bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_38bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_39bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_40bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_41bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_42bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_43bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_44bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_45bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_46bytes), L(table_48_bytes_bwd))
+ .int JMPTBL (L(bk_write_47bytes), L(table_48_bytes_bwd))
+
+ .popsection
+
+#ifdef USE_AS_MEMMOVE
+ ALIGN (4)
+L(copy_backward):
+ PUSH (%esi)
+ movl %eax, %esi
+ add %ecx, %edx
+ add %ecx, %esi
+ testl $0x3, %edx
+ jnz L(bk_align)
+
+L(bk_aligned_4):
+ cmp $64, %ecx
+ jae L(bk_write_more64bytes)
+
+L(bk_write_64bytesless):
+ cmp $32, %ecx
+ jb L(bk_write_less32bytes)
+
+L(bk_write_more32bytes):
+ /* Copy 32 bytes at a time. */
+ sub $32, %ecx
+ movl -4(%esi), %eax
+ movl %eax, -4(%edx)
+ movl -8(%esi), %eax
+ movl %eax, -8(%edx)
+ movl -12(%esi), %eax
+ movl %eax, -12(%edx)
+ movl -16(%esi), %eax
+ movl %eax, -16(%edx)
+ movl -20(%esi), %eax
+ movl %eax, -20(%edx)
+ movl -24(%esi), %eax
+ movl %eax, -24(%edx)
+ movl -28(%esi), %eax
+ movl %eax, -28(%edx)
+ movl -32(%esi), %eax
+ movl %eax, -32(%edx)
+ sub $32, %edx
+ sub $32, %esi
+
+L(bk_write_less32bytes):
+ movl %esi, %eax
+ sub %ecx, %edx
+ sub %ecx, %eax
+ POP (%esi)
+L(bk_write_less48bytes):
+ BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4)
+
+ CFI_PUSH (%esi)
+ ALIGN (4)
+L(bk_align):
+ cmp $8, %ecx
+ jbe L(bk_write_less32bytes)
+ testl $1, %edx
+ /* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0,
+ then (EDX & 2) must be != 0. */
+ jz L(bk_got2)
+ sub $1, %esi
+ sub $1, %ecx
+ sub $1, %edx
+ movzbl (%esi), %eax
+ movb %al, (%edx)
+
+ testl $2, %edx
+ jz L(bk_aligned_4)
+
+L(bk_got2):
+ sub $2, %esi
+ sub $2, %ecx
+ sub $2, %edx
+ movzwl (%esi), %eax
+ movw %ax, (%edx)
+ jmp L(bk_aligned_4)
+
+ ALIGN (4)
+L(bk_write_more64bytes):
+ /* Check alignment of last byte. */
+ testl $15, %edx
+ jz L(bk_ssse3_cpy_pre)
+
+/* EDX is aligned 4 bytes, but not 16 bytes. */
+L(bk_ssse3_align):
+ sub $4, %esi
+ sub $4, %ecx
+ sub $4, %edx
+ movl (%esi), %eax
+ movl %eax, (%edx)
+
+ testl $15, %edx
+ jz L(bk_ssse3_cpy_pre)
+
+ sub $4, %esi
+ sub $4, %ecx
+ sub $4, %edx
+ movl (%esi), %eax
+ movl %eax, (%edx)
+
+ testl $15, %edx
+ jz L(bk_ssse3_cpy_pre)
+
+ sub $4, %esi
+ sub $4, %ecx
+ sub $4, %edx
+ movl (%esi), %eax
+ movl %eax, (%edx)
+
+L(bk_ssse3_cpy_pre):
+ cmp $64, %ecx
+ jb L(bk_write_more32bytes)
+
+L(bk_ssse3_cpy):
+ sub $64, %esi
+ sub $64, %ecx
+ sub $64, %edx
+ movdqu 0x30(%esi), %xmm3
+ movdqa %xmm3, 0x30(%edx)
+ movdqu 0x20(%esi), %xmm2
+ movdqa %xmm2, 0x20(%edx)
+ movdqu 0x10(%esi), %xmm1
+ movdqa %xmm1, 0x10(%edx)
+ movdqu (%esi), %xmm0
+ movdqa %xmm0, (%edx)
+ cmp $64, %ecx
+ jae L(bk_ssse3_cpy)
+ jmp L(bk_write_64bytesless)
+
+#endif
+
+END (MEMCPY)
--- /dev/null
+#define USE_AS_MEMMOVE
+#define MEMCPY mpx_memmove_nobnd
+#include "mpx_memcpy_nobnd.S"
--- /dev/null
+#define USE_AS_MEMPCPY
+#define MEMCPY mpx_mempcpy_nobnd
+#include "mpx_memcpy_nobnd.S"
movl LEN(%esp), %ebx
test %ebx, %ebx
jz L(ExitZero)
+# endif
+# ifdef __CHKP__
+ bndldx STR1(%esp,%eax,1), %bnd0
+ bndldx STR2(%esp,%esi,1), %bnd1
+ bndcl (%esi), %bnd1
+ bndcu (%esi), %bnd1
# endif
cmpb $0, (%esi)
mov %esi, %ecx
mov %eax, %edx
jz L(ExitZero)
+# ifdef __CHKP__
+ bndcl (%eax), %bnd0
+ bndcu (%eax), %bnd0
+# endif
and $63, %ecx
and $63, %edx
movdqu (%eax), %xmm1
movdqu (%esi), %xmm5
pcmpeqb %xmm1, %xmm0
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
movdqu 16(%esi), %xmm6
pmovmskb %xmm0, %ecx
pcmpeqb %xmm5, %xmm4
and $-16, %eax
pcmpeqb (%eax), %xmm0
movdqu (%esi), %xmm5
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
movdqu 16(%esi), %xmm6
pmovmskb %xmm0, %edx
pcmpeqb %xmm5, %xmm4
pxor %xmm3, %xmm3
.p2align 4
L(align16_loop):
+# ifdef __CHKP__
+ bndcu 16(%eax), %bnd0
+# endif
pcmpeqb 16(%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+# ifdef __CHKP__
+ bndcu 32(%eax), %bnd0
+# endif
pcmpeqb 32(%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+# ifdef __CHKP__
+ bndcu 48(%eax), %bnd0
+# endif
pcmpeqb 48(%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+# ifdef __CHKP__
+ bndcu 64(%eax), %bnd0
+# endif
pcmpeqb 64(%eax), %xmm3
pmovmskb %xmm3, %edx
lea 64(%eax), %eax
test %edx, %edx
jnz L(CopyFrom1To16BytesTail1)
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm5, (%eax)
pmovmskb %xmm7, %edx
# ifdef USE_AS_STRNCAT
.p2align 4
L(align16_loop_1):
+# ifdef __CHKP__
+ bndcu 16(%eax), %bnd0
+# endif
pcmpeqb 16(%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16_1)
+# ifdef __CHKP__
+ bndcu 32(%eax), %bnd0
+# endif
pcmpeqb 32(%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32_1)
+# ifdef __CHKP__
+ bndcu 48(%eax), %bnd0
+# endif
pcmpeqb 48(%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48_1)
+# ifdef __CHKP__
+ bndcu 64(%eax), %bnd0
+# endif
pcmpeqb 64(%eax), %xmm3
pmovmskb %xmm3, %edx
lea 64(%eax), %eax
test %edx, %edx
jnz L(CopyFrom1To16BytesTail)
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
pcmpeqb 16(%esi), %xmm0
pmovmskb %xmm0, %edx
# ifdef USE_AS_STRNCAT
mov $16, %ecx
movdqa (%esi, %ecx), %xmm1
movaps 16(%esi, %ecx), %xmm2
+# ifdef __CHKP__
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movdqu %xmm1, (%eax, %ecx)
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
L(Unalign16BothBigN):
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm3
movdqu %xmm2, (%eax, %ecx)
pcmpeqb %xmm3, %xmm0
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm4
movdqu %xmm3, (%eax, %ecx)
pcmpeqb %xmm4, %xmm0
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm1
movdqu %xmm4, (%eax, %ecx)
pcmpeqb %xmm1, %xmm0
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm2
movdqu %xmm1, (%eax, %ecx)
pcmpeqb %xmm2, %xmm0
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm3
movdqu %xmm2, (%eax, %ecx)
pcmpeqb %xmm3, %xmm0
test %edx, %edx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movdqu %xmm3, (%eax, %ecx)
mov %esi, %edx
lea 16(%esi, %ecx), %esi
sub %edx, %eax
# ifdef USE_AS_STRNCAT
lea 128(%ebx, %edx), %ebx
+# endif
+# ifdef __CHKP__
+ bndcu (%esi), %bnd1
# endif
movaps (%esi), %xmm2
movaps %xmm2, %xmm4
L(Unaligned64Loop_start):
add $64, %eax
add $64, %esi
+# ifdef __CHKP__
+ bndcu (%esi), %bnd1
+ bndcu -1(%eax), %bnd0
+# endif
movdqu %xmm4, -64(%eax)
movaps (%esi), %xmm2
movdqa %xmm2, %xmm4
jnz L(CopyFrom1To16BytesUnaligned_32)
bsf %ecx, %edx
+# ifdef __CHKP__
+ bndcu 47(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
movdqu %xmm5, 16(%eax)
movdqu %xmm6, 32(%eax)
add $48, %esi
add $48, %eax
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
# ifdef USE_AS_STRNCAT
test %edx, %edx
jnz L(CopyFrom1To16BytesTail)
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
pcmpeqb 16(%esi), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(CopyFrom1To32Bytes)
movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm1, (%eax)
sub %ecx, %eax
sub $48, %ebx
mov $16, %ecx
movdqa (%esi, %ecx), %xmm1
movaps 16(%esi, %ecx), %xmm2
+# ifdef __CHKP__
+ bndcu 15(%eax, %ecx), %bnd0
+# endif
movdqu %xmm1, (%eax, %ecx)
pcmpeqb %xmm2, %xmm0
pmovmskb %xmm0, %edx
add %ecx, %eax
add %ecx, %esi
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
L(CopyFrom1To16BytesTail):
add %ecx, %esi
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
add $16, %eax
L(CopyFrom1To16BytesTail1):
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
add %ecx, %esi
add $16, %edx
sub %ecx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
L(CopyFrom1To16BytesUnaligned_0):
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
L(CopyFrom1To16BytesUnaligned_16):
bsf %ecx, %edx
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
add $16, %esi
add $16, %eax
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
.p2align 4
L(CopyFrom1To16BytesUnaligned_32):
bsf %edx, %edx
+# ifdef __CHKP__
+ bndcu 31(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
movdqu %xmm5, 16(%eax)
add $32, %esi
add $32, %eax
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
# ifdef USE_AS_STRNCAT
.p2align 4
L(CopyFrom1To16BytesExit):
+# ifdef __CHKP__
+ bndcu (%eax, %edx), %bnd0
+ bndcu (%esi, %edx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
/* Case2 */
bsf %edx, %edx
cmp %ebx, %edx
jb L(CopyFrom1To16BytesExit)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
.p2align 4
sub %ecx, %edx
cmp %ebx, %edx
jb L(CopyFrom1To16BytesExit)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
L(CopyFrom1To16BytesTailCase2):
bsf %edx, %edx
cmp %ebx, %edx
jb L(CopyFrom1To16BytesExit)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
L(CopyFrom1To16BytesTail1Case2):
bsf %edx, %edx
cmp %ebx, %edx
jb L(CopyFrom1To16BytesExit)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
/* Case2 or Case3, Case3 */
add $16, %ebx
add %ecx, %eax
add %ecx, %esi
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
.p2align 4
jnz L(CopyFrom1To32BytesCase2)
sub %ecx, %ebx
add %ecx, %esi
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
.p2align 4
jnz L(CopyFrom1To16BytesTailCase2)
sub %ecx, %ebx
add %ecx, %esi
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
.p2align 4
L(CopyFrom1To16BytesTail1Case2OrCase3):
test %edx, %edx
jnz L(CopyFrom1To16BytesTail1Case2)
+# ifdef __CHKP__
+ bndcu 1(%eax, %ebx), %bnd0
+ bndcu 1(%esi, %ebx), %bnd1
+# endif
BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncatTable), %ebx, 4)
# endif
and $-16, %ecx
add $48, %ebx
jl L(CopyFrom1To16BytesCase3)
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
sub $16, %ebx
jb L(CopyFrom1To16BytesCase3)
+# ifdef __CHKP__
+ bndcu 31(%eax), %bnd0
+# endif
movdqu %xmm5, 16(%eax)
sub $16, %ebx
jb L(CopyFrom1To16BytesCase3)
+# ifdef __CHKP__
+ bndcu 47(%eax), %bnd0
+# endif
movdqu %xmm6, 32(%eax)
sub $16, %ebx
jb L(CopyFrom1To16BytesCase3)
+# ifdef __CHKP__
+ bndcu 63(%eax), %bnd0
+# endif
movdqu %xmm7, 48(%eax)
xor %bh, %bh
movb %bh, 64(%eax)
pcmpeqb %xmm5, %xmm0
pmovmskb %xmm0, %edx
+# ifdef __CHKP__
+ bndcu 15(%eax), %bnd0
+# endif
movdqu %xmm4, (%eax)
add $16, %ecx
sub $16, %ebx
pcmpeqb %xmm6, %xmm0
pmovmskb %xmm0, %edx
+# ifdef __CHKP__
+ bndcu 31(%eax), %bnd0
+# endif
movdqu %xmm5, 16(%eax)
add $16, %ecx
sub $16, %ebx
pcmpeqb %xmm7, %xmm0
pmovmskb %xmm0, %edx
+# ifdef __CHKP__
+ bndcu 47(%eax), %bnd0
+# endif
movdqu %xmm6, 32(%eax)
lea 16(%eax, %ecx), %eax
lea 16(%esi, %ecx), %esi
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx), %bnd0
+ bndcu (%ecx), %bnd0
+# endif
+
pxor %xmm2, %xmm2
mov %ecx, %edi
punpcklbw %xmm1, %xmm1
L(unaligned_match):
add %edi, %eax
add %ecx, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
.p2align 4
.p2align 4
/* Loop start on aligned string. */
L(loop):
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
or %eax, %edx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
or %eax, %edx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
or %eax, %edx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
L(match):
sub $16, %edi
add %edi, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
RETURN
/* Return NULL. */
movl CNT(%esp), REM
test REM, REM
je L(eq)
+#endif
+#ifdef __CHKP__
+ bndldx STR1(%esp,%edx,1), %bnd0
+ bndldx STR2(%esp,%eax,1), %bnd1
+ bndcl (%edx), %bnd0
+ bndcl (%eax), %bnd1
#endif
mov %dx, %cx
and $0xfff, %cx
add $16, %edx
add $16, %eax
L(first4bytes):
+#ifdef __CHKP__
+ bndcu (%edx), %bnd0
+ bndcu (%eax), %bnd1
+#endif
movzbl (%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl (%edx), %edi
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 1(%edx), %bnd0
+ bndcu 1(%eax), %bnd1
+#endif
movzbl 1(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 1(%edx), %edi
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $2, REM
je L(eq)
+#endif
+#ifdef __CHKP__
+ bndcu 2(%edx), %bnd0
+ bndcu 2(%eax), %bnd1
#endif
movzbl 2(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $3, REM
je L(eq)
+#endif
+#ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+ bndcu 3(%eax), %bnd1
#endif
movzbl 3(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $4, REM
je L(eq)
+#endif
+#ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+ bndcu 4(%eax), %bnd1
#endif
movzbl 4(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $5, REM
je L(eq)
+#endif
+#ifdef __CHKP__
+ bndcu 5(%edx), %bnd0
+ bndcu 5(%eax), %bnd1
#endif
movzbl 5(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $6, REM
je L(eq)
+#endif
+#ifdef __CHKP__
+ bndcu 6(%edx), %bnd0
+ bndcu 6(%eax), %bnd1
#endif
movzbl 6(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $7, REM
je L(eq)
+#endif
+#ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+ bndcu 7(%eax), %bnd1
#endif
movzbl 7(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
testl %edx, %edx
jg L(crosspage)
L(loop):
+#ifdef __CHKP__
+ bndcu (%edi,%edx), %bnd0
+ bndcu (%esi,%edx), %bnd1
+#endif
movdqu (%esi,%edx), %xmm2
movdqu (%edi,%edx), %xmm1
TOLOWER (%xmm2, %xmm1)
add $16, %edx
jle L(loop)
L(crosspage):
+#ifdef __CHKP__
+ bndcu (%edi,%edx), %bnd0
+ bndcu (%esi,%edx), %bnd1
+#endif
movzbl (%edi,%edx), %eax
movzbl (%esi,%edx), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
add $8, %eax
L(less4bytes):
+#ifdef __CHKP__
+ bndcu (%edx), %bnd0
+ bndcu (%eax), %bnd1
+#endif
movzbl (%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl (%edx), %edi
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $1, REM
je L(eq)
+#endif
+#ifdef __CHKP__
+ bndcu 1(%edx), %bnd0
+ bndcu 1(%eax), %bnd1
#endif
movzbl 1(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
je L(eq)
#endif
+#ifdef __CHKP__
+ bndcu 2(%edx), %bnd0
+ bndcu 2(%eax), %bnd1
+#endif
movzbl 2(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movzbl 2(%edx), %edi
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $3, REM
je L(eq)
+#endif
+#ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+ bndcu 3(%eax), %bnd1
#endif
movzbl 3(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $4, REM
je L(eq)
+#endif
+#ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+ bndcu 4(%eax), %bnd1
#endif
movzbl 4(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $5, REM
je L(eq)
+#endif
+#ifdef __CHKP__
+ bndcu 5(%edx), %bnd0
+ bndcu 5(%eax), %bnd1
#endif
movzbl 5(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $6, REM
je L(eq)
+#endif
+#ifdef __CHKP__
+ bndcu 6(%edx), %bnd0
+ bndcu 6(%eax), %bnd1
#endif
movzbl 6(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
cmp $7, REM
je L(eq)
+#endif
+#ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+ bndcu 7(%eax), %bnd1
#endif
movzbl 7(%eax), %ecx
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
movl LEN(%esp), %ebx
test %ebx, %ebx
jz L(ExitZero)
+# ifdef __CHKP__
+ bndldx STR1(%esp,%edi,1), %bnd0
+ bndldx STR2(%esp,%esi,1), %bnd1
+ bndcl (%esi), %bnd1
+ bndcu (%esi), %bnd1
+ bndcl (%edi), %bnd0
+ bndcu -1(%edi, %ebx), %bnd0
+# endif
mov %esi, %ecx
# ifndef USE_AS_STPCPY
test %edx, %edx
jnz L(CopyFrom1To16BytesTail)
+# ifdef __CHKP__
+ bndcu 16(%esi), %bnd1
+# endif
pcmpeqb 16(%esi), %xmm0
pmovmskb %xmm0, %edx
# ifdef USE_AS_STPCPY
jnz L(CopyFrom1To32Bytes)
movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */
+# ifdef __CHKP__
+ bndcu 15(%edi), %bnd0
+# endif
movdqu %xmm1, (%edi)
sub %ecx, %edi
.p2align 4
L(Unalign16Both):
mov $16, %ecx
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movdqa (%esi, %ecx), %xmm1
movaps 16(%esi, %ecx), %xmm2
movdqu %xmm1, (%edi, %ecx)
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm2)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm3
movdqu %xmm2, (%edi, %ecx)
pcmpeqb %xmm3, %xmm0
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm3)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm4
movdqu %xmm3, (%edi, %ecx)
pcmpeqb %xmm4, %xmm0
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm4)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm1
movdqu %xmm4, (%edi, %ecx)
pcmpeqb %xmm1, %xmm0
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm1)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm2
movdqu %xmm1, (%edi, %ecx)
pcmpeqb %xmm2, %xmm0
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm2)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movaps 16(%esi, %ecx), %xmm3
movdqu %xmm2, (%edi, %ecx)
pcmpeqb %xmm3, %xmm0
test %edx, %edx
jnz L(CopyFrom1To16BytesUnalignedXmm3)
+# ifdef __CHKP__
+ bndcu 16(%esi, %ecx), %bnd1
+ bndcu 15(%edi, %ecx), %bnd0
+# endif
movdqu %xmm3, (%edi, %ecx)
mov %esi, %edx
lea 16(%esi, %ecx), %esi
lea 128(%ebx, %edx), %ebx
L(Unaligned64Loop):
+# ifdef __CHKP__
+ bndcu 48(%esi), %bnd1
+# endif
movaps (%esi), %xmm2
movaps %xmm2, %xmm4
movaps 16(%esi), %xmm5
L(Unaligned64Loop_start):
add $64, %edi
add $64, %esi
+# ifdef __CHKP__
+ bndcu (%esi), %bnd1
+ bndcu (%edi), %bnd0
+# endif
movdqu %xmm4, -64(%edi)
movaps (%esi), %xmm2
movdqa %xmm2, %xmm4
jnz L(CopyFrom1To16BytesUnaligned_32)
bsf %ecx, %edx
+# ifdef __CHKP__
+ bndcu 47(%edi), %bnd0
+# endif
movdqu %xmm4, (%edi)
movdqu %xmm5, 16(%edi)
movdqu %xmm6, 32(%edi)
# ifdef USE_AS_STPCPY
+# ifdef __CHKP__
+ bndcu 48(%edi, %edx), %bnd0
+# endif
lea 48(%edi, %edx), %eax
+# endif
+# ifdef __CHKP__
+ bndcu 63(%edi), %bnd0
# endif
movdqu %xmm7, 48(%edi)
add $15, %ebx
sub %edx, %ebx
+# ifdef __CHKP__
+ bndcu 49(%edi, %edx), %bnd0
+# endif
lea 49(%edi, %edx), %edi
jmp L(StrncpyFillTailWithZero)
test %edx, %edx
jnz L(CopyFrom1To16BytesTail1)
+# ifdef __CHKP__
+ bndcu 15(%edi), %bnd0
+ bndcu 16(%esi), %bnd1
+# endif
pcmpeqb 16(%esi), %xmm0
movdqu %xmm1, (%edi)
pmovmskb %xmm0, %edx
bsf %edx, %edx
BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4)
- .p2align 4
+ .p2align 9
L(CopyFrom1To32Bytes1):
add $16, %esi
add $16, %edi
bsf %edx, %edx
# ifdef USE_AS_STPCPY
lea (%edi, %edx), %eax
+# endif
+# ifdef __CHKP__
+ bndcu 15(%edi), %bnd0
# endif
movdqu %xmm4, (%edi)
add $63, %ebx
movdqu %xmm4, (%edi)
# ifdef USE_AS_STPCPY
lea 16(%edi, %edx), %eax
+# endif
+# ifdef __CHKP__
+ bndcu 31(%edi), %bnd0
# endif
movdqu %xmm5, 16(%edi)
add $47, %ebx
movdqu %xmm5, 16(%edi)
# ifdef USE_AS_STPCPY
lea 32(%edi, %edx), %eax
+# endif
+# ifdef __CHKP__
+ bndcu 47(%edi), %bnd0
# endif
movdqu %xmm6, 32(%edi)
add $31, %ebx
.p2align 4
L(Exit2):
movw (%esi), %dx
+# ifdef __CHKP__
+ bndcu 1(%edi), %bnd0
+# endif
movw %dx, (%edi)
# ifdef USE_AS_STPCPY
lea 1(%edi), %eax
.p2align 4
L(Exit3):
movw (%esi), %cx
+# ifdef __CHKP__
+ bndcu 2(%edi), %bnd0
+# endif
movw %cx, (%edi)
movb %dh, 2(%edi)
# ifdef USE_AS_STPCPY
.p2align 4
L(Exit4):
movl (%esi), %edx
+# ifdef __CHKP__
+ bndcu 3(%edi), %bnd0
+# endif
movl %edx, (%edi)
# ifdef USE_AS_STPCPY
lea 3(%edi), %eax
.p2align 4
L(Exit5):
movl (%esi), %ecx
+# ifdef __CHKP__
+ bndcu 4(%edi), %bnd0
+# endif
movb %dh, 4(%edi)
movl %ecx, (%edi)
# ifdef USE_AS_STPCPY
L(Exit6):
movl (%esi), %ecx
movw 4(%esi), %dx
+# ifdef __CHKP__
+ bndcu 5(%edi), %bnd0
+# endif
movl %ecx, (%edi)
movw %dx, 4(%edi)
# ifdef USE_AS_STPCPY
L(Exit7):
movl (%esi), %ecx
movl 3(%esi), %edx
+# ifdef __CHKP__
+ bndcu 6(%edi), %bnd0
+# endif
movl %ecx, (%edi)
movl %edx, 3(%edi)
# ifdef USE_AS_STPCPY
.p2align 4
L(Exit8):
movlpd (%esi), %xmm0
+# ifdef __CHKP__
+ bndcu 7(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
# ifdef USE_AS_STPCPY
lea 7(%edi), %eax
.p2align 4
L(Exit9):
movlpd (%esi), %xmm0
+# ifdef __CHKP__
+ bndcu 8(%edi), %bnd0
+# endif
movb %dh, 8(%edi)
movlpd %xmm0, (%edi)
# ifdef USE_AS_STPCPY
L(Exit10):
movlpd (%esi), %xmm0
movw 8(%esi), %dx
+# ifdef __CHKP__
+ bndcu 9(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movw %dx, 8(%edi)
# ifdef USE_AS_STPCPY
L(Exit11):
movlpd (%esi), %xmm0
movl 7(%esi), %edx
+# ifdef __CHKP__
+ bndcu 10(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movl %edx, 7(%edi)
# ifdef USE_AS_STPCPY
L(Exit12):
movlpd (%esi), %xmm0
movl 8(%esi), %edx
+# ifdef __CHKP__
+ bndcu 11(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movl %edx, 8(%edi)
# ifdef USE_AS_STPCPY
L(Exit13):
movlpd (%esi), %xmm0
movlpd 5(%esi), %xmm1
+# ifdef __CHKP__
+ bndcu 12(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movlpd %xmm1, 5(%edi)
# ifdef USE_AS_STPCPY
L(Exit14):
movlpd (%esi), %xmm0
movlpd 6(%esi), %xmm1
+# ifdef __CHKP__
+ bndcu 13(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movlpd %xmm1, 6(%edi)
# ifdef USE_AS_STPCPY
L(Exit15):
movlpd (%esi), %xmm0
movlpd 7(%esi), %xmm1
+# ifdef __CHKP__
+ bndcu 14(%edi), %bnd0
+# endif
movlpd %xmm0, (%edi)
movlpd %xmm1, 7(%edi)
# ifdef USE_AS_STPCPY
.p2align 4
L(Exit16):
movdqu (%esi), %xmm0
+# ifdef __CHKP__
+ bndcu 15(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
# ifdef USE_AS_STPCPY
lea 15(%edi), %eax
.p2align 4
L(Exit17):
movdqu (%esi), %xmm0
+# ifdef __CHKP__
+ bndcu 16(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movb %dh, 16(%edi)
# ifdef USE_AS_STPCPY
L(Exit18):
movdqu (%esi), %xmm0
movw 16(%esi), %cx
+# ifdef __CHKP__
+ bndcu 17(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movw %cx, 16(%edi)
# ifdef USE_AS_STPCPY
L(Exit19):
movdqu (%esi), %xmm0
movl 15(%esi), %ecx
+# ifdef __CHKP__
+ bndcu 18(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movl %ecx, 15(%edi)
# ifdef USE_AS_STPCPY
L(Exit20):
movdqu (%esi), %xmm0
movl 16(%esi), %ecx
+# ifdef __CHKP__
+ bndcu 19(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movl %ecx, 16(%edi)
# ifdef USE_AS_STPCPY
L(Exit21):
movdqu (%esi), %xmm0
movl 16(%esi), %ecx
+# ifdef __CHKP__
+ bndcu 20(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movl %ecx, 16(%edi)
movb %dh, 20(%edi)
L(Exit22):
movdqu (%esi), %xmm0
movlpd 14(%esi), %xmm3
+# ifdef __CHKP__
+ bndcu 21(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm3, 14(%edi)
# ifdef USE_AS_STPCPY
L(Exit23):
movdqu (%esi), %xmm0
movlpd 15(%esi), %xmm3
+# ifdef __CHKP__
+ bndcu 22(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm3, 15(%edi)
# ifdef USE_AS_STPCPY
L(Exit24):
movdqu (%esi), %xmm0
movlpd 16(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 23(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm2, 16(%edi)
# ifdef USE_AS_STPCPY
L(Exit25):
movdqu (%esi), %xmm0
movlpd 16(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 24(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm2, 16(%edi)
movb %dh, 24(%edi)
movdqu (%esi), %xmm0
movlpd 16(%esi), %xmm2
movw 24(%esi), %cx
+# ifdef __CHKP__
+ bndcu 25(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm2, 16(%edi)
movw %cx, 24(%edi)
movdqu (%esi), %xmm0
movlpd 16(%esi), %xmm2
movl 23(%esi), %ecx
+# ifdef __CHKP__
+ bndcu 26(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm2, 16(%edi)
movl %ecx, 23(%edi)
movdqu (%esi), %xmm0
movlpd 16(%esi), %xmm2
movl 24(%esi), %ecx
+# ifdef __CHKP__
+ bndcu 27(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movlpd %xmm2, 16(%edi)
movl %ecx, 24(%edi)
L(Exit29):
movdqu (%esi), %xmm0
movdqu 13(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 28(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movdqu %xmm2, 13(%edi)
# ifdef USE_AS_STPCPY
L(Exit30):
movdqu (%esi), %xmm0
movdqu 14(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 29(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movdqu %xmm2, 14(%edi)
# ifdef USE_AS_STPCPY
L(Exit31):
movdqu (%esi), %xmm0
movdqu 15(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 30(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movdqu %xmm2, 15(%edi)
# ifdef USE_AS_STPCPY
L(Exit32):
movdqu (%esi), %xmm0
movdqu 16(%esi), %xmm2
+# ifdef __CHKP__
+ bndcu 31(%edi), %bnd0
+# endif
movdqu %xmm0, (%edi)
movdqu %xmm2, 16(%edi)
# ifdef USE_AS_STPCPY
ENTRANCE
mov STR1(%esp), %edx
mov STR2(%esp), %ecx
+# ifdef __CHKP__
+ bndldx STR1(%esp,%edx,1), %bnd0
+ bndldx STR2(%esp,%ecx,1), %bnd1
+ bndcl (%ecx), %bnd1
+ bndcu (%ecx), %bnd1
+ bndcl (%edx), %bnd0
+ bndcu (%edx), %bnd0
+# endif
cmpb $0, (%ecx)
jz L(ExitTail1)
+# ifdef __CHKP__
+ bndcu 1(%ecx), %bnd1
+# endif
cmpb $0, 1(%ecx)
jz L(ExitTail2)
+# ifdef __CHKP__
+ bndcu 2(%ecx), %bnd1
+# endif
cmpb $0, 2(%ecx)
jz L(ExitTail3)
+# ifdef __CHKP__
+ bndcu 3(%ecx), %bnd1
+# endif
cmpb $0, 3(%ecx)
jz L(ExitTail4)
+# ifdef __CHKP__
+ bndcu 4(%ecx), %bnd1
+# endif
cmpb $0, 4(%ecx)
jz L(ExitTail5)
+# ifdef __CHKP__
+ bndcu 5(%ecx), %bnd1
+# endif
cmpb $0, 5(%ecx)
jz L(ExitTail6)
+# ifdef __CHKP__
+ bndcu 6(%ecx), %bnd1
+# endif
cmpb $0, 6(%ecx)
jz L(ExitTail7)
+# ifdef __CHKP__
+ bndcu 7(%ecx), %bnd1
+# endif
cmpb $0, 7(%ecx)
jz L(ExitTail8)
+# ifdef __CHKP__
+ bndcu 8(%ecx), %bnd1
+# endif
cmpb $0, 8(%ecx)
jz L(ExitTail9)
+# ifdef __CHKP__
+ bndcu 9(%ecx), %bnd1
+# endif
cmpb $0, 9(%ecx)
jz L(ExitTail10)
+# ifdef __CHKP__
+ bndcu 10(%ecx), %bnd1
+# endif
cmpb $0, 10(%ecx)
jz L(ExitTail11)
+# ifdef __CHKP__
+ bndcu 11(%ecx), %bnd1
+# endif
cmpb $0, 11(%ecx)
jz L(ExitTail12)
+# ifdef __CHKP__
+ bndcu 12(%ecx), %bnd1
+# endif
cmpb $0, 12(%ecx)
jz L(ExitTail13)
+# ifdef __CHKP__
+ bndcu 13(%ecx), %bnd1
+# endif
cmpb $0, 13(%ecx)
jz L(ExitTail14)
+# ifdef __CHKP__
+ bndcu 14(%ecx), %bnd1
+# endif
cmpb $0, 14(%ecx)
jz L(ExitTail15)
+# ifdef __CHKP__
+ bndcu 15(%ecx), %bnd1
+# endif
cmpb $0, 15(%ecx)
jz L(ExitTail16)
and $-16, %ebx
pxor %xmm0, %xmm0
movdqu (%ecx), %xmm1
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movdqu %xmm1, (%edx)
pcmpeqb (%ebx), %xmm0
pmovmskb %xmm0, %eax
xor %ebx, %ebx
.p2align 4
+# ifdef __CHKP__
+ bndcu 16(%ecx), %bnd1
+ bndcu 15(%edx), %bnd0
+# endif
movdqa (%ecx), %xmm1
movaps 16(%ecx), %xmm2
movdqu %xmm1, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%ecx, %ebx), %bnd1
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movaps 16(%ecx, %ebx), %xmm3
movdqu %xmm2, (%edx, %ebx)
pcmpeqb %xmm3, %xmm0
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%ecx, %ebx), %bnd1
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movaps 16(%ecx, %ebx), %xmm4
movdqu %xmm3, (%edx, %ebx)
pcmpeqb %xmm4, %xmm0
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%ecx, %ebx), %bnd1
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movaps 16(%ecx, %ebx), %xmm1
movdqu %xmm4, (%edx, %ebx)
pcmpeqb %xmm1, %xmm0
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%ecx, %ebx), %bnd1
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movaps 16(%ecx, %ebx), %xmm2
movdqu %xmm1, (%edx, %ebx)
pcmpeqb %xmm2, %xmm0
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 16(%ecx, %ebx), %bnd1
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movaps 16(%ecx, %ebx), %xmm3
movdqu %xmm2, (%edx, %ebx)
pcmpeqb %xmm3, %xmm0
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %ebx), %bnd0
+# endif
movdqu %xmm3, (%edx, %ebx)
mov %ecx, %eax
lea 16(%ecx, %ebx), %ecx
sub %eax, %edx
L(Aligned64Loop):
+# ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+# endif
movaps (%ecx), %xmm2
movaps %xmm2, %xmm4
movaps 16(%ecx), %xmm5
test %eax, %eax
jnz L(Aligned64Leave)
L(Aligned64Loop_start):
+# ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+ bndcu -1(%edx), %bnd0
+# endif
movdqu %xmm4, -64(%edx)
movaps (%ecx), %xmm2
movdqa %xmm2, %xmm4
pcmpeqb %xmm5, %xmm0
pmovmskb %xmm0, %eax
+# ifdef __CHKP__
+ bndcu -49(%edx), %bnd0
+# endif
movdqu %xmm4, -64(%edx)
test %eax, %eax
lea 16(%ebx), %ebx
pcmpeqb %xmm6, %xmm0
pmovmskb %xmm0, %eax
+# ifdef __CHKP__
+ bndcu -33(%edx), %bnd0
+# endif
movdqu %xmm5, -48(%edx)
test %eax, %eax
lea 16(%ebx), %ebx
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu -17(%edx), %bnd0
+# endif
movdqu %xmm6, -32(%edx)
pcmpeqb %xmm7, %xmm0
pmovmskb %xmm0, %eax
test $0x40, %al
jnz L(Exit7)
/* Exit 8 */
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+ bndcu 7(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 4(%ecx), %eax
test $0x40, %ah
jnz L(Exit15)
/* Exit 16 */
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 15(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 8(%ecx), %xmm0
.p2align 4
L(Exit1):
+# ifdef __CHKP__
+ bndcu (%edx), %bnd0
+ bndcu (%ecx), %bnd1
+# endif
movb (%ecx), %al
movb %al, (%edx)
# ifdef USE_AS_STPCPY
.p2align 4
L(Exit2):
+# ifdef __CHKP__
+ bndcu 1(%edx), %bnd0
+ bndcu 1(%ecx), %bnd1
+# endif
movw (%ecx), %ax
movw %ax, (%edx)
# ifdef USE_AS_STPCPY
.p2align 4
L(Exit3):
+# ifdef __CHKP__
+ bndcu 2(%edx), %bnd0
+ bndcu 2(%ecx), %bnd1
+# endif
movw (%ecx), %ax
movw %ax, (%edx)
movb 2(%ecx), %al
.p2align 4
L(Exit4):
+# ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+ bndcu 3(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
# ifdef USE_AS_STPCPY
.p2align 4
L(Exit5):
+# ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+ bndcu 4(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movb 4(%ecx), %al
.p2align 4
L(Exit6):
+# ifdef __CHKP__
+ bndcu 5(%edx), %bnd0
+ bndcu 5(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movw 4(%ecx), %ax
.p2align 4
L(Exit7):
+# ifdef __CHKP__
+ bndcu 6(%edx), %bnd0
+ bndcu 6(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 3(%ecx), %eax
.p2align 4
L(Exit9):
+# ifdef __CHKP__
+ bndcu 8(%edx), %bnd0
+ bndcu 8(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 4(%ecx), %eax
.p2align 4
L(Exit10):
+# ifdef __CHKP__
+ bndcu 9(%edx), %bnd0
+ bndcu 9(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 4(%ecx), %eax
.p2align 4
L(Exit11):
+# ifdef __CHKP__
+ bndcu 10(%edx), %bnd0
+ bndcu 10(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 4(%ecx), %eax
.p2align 4
L(Exit12):
+# ifdef __CHKP__
+ bndcu 11(%edx), %bnd0
+ bndcu 11(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl 4(%ecx), %eax
.p2align 4
L(Exit13):
+# ifdef __CHKP__
+ bndcu 12(%edx), %bnd0
+ bndcu 12(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 5(%ecx), %xmm0
.p2align 4
L(Exit14):
+# ifdef __CHKP__
+ bndcu 13(%edx), %bnd0
+ bndcu 13(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 6(%ecx), %xmm0
.p2align 4
L(Exit15):
+# ifdef __CHKP__
+ bndcu 14(%edx), %bnd0
+ bndcu 14(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movlpd 7(%ecx), %xmm0
.p2align 4
L(ExitTail1):
+# ifdef __CHKP__
+ bndcu (%edx), %bnd0
+# endif
movb (%ecx), %al
movb %al, (%edx)
movl %edx, %eax
.p2align 4
L(ExitTail2):
movw (%ecx), %ax
+# ifdef __CHKP__
+ bndcu 1(%edx), %bnd0
+# endif
movw %ax, (%edx)
# ifdef USE_AS_STPCPY
lea 1(%edx), %eax
.p2align 4
L(ExitTail3):
movw (%ecx), %ax
+# ifdef __CHKP__
+ bndcu 2(%edx), %bnd0
+# endif
movw %ax, (%edx)
movb 2(%ecx), %al
movb %al, 2(%edx)
.p2align 4
L(ExitTail4):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+# endif
movl %eax, (%edx)
# ifdef USE_AS_STPCPY
lea 3(%edx), %eax
.p2align 4
L(ExitTail5):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+# endif
movl %eax, (%edx)
movb 4(%ecx), %al
movb %al, 4(%edx)
.p2align 4
L(ExitTail6):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 5(%edx), %bnd0
+# endif
movl %eax, (%edx)
movw 4(%ecx), %ax
movw %ax, 4(%edx)
.p2align 4
L(ExitTail7):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 6(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 3(%ecx), %eax
movl %eax, 3(%edx)
.p2align 4
L(ExitTail8):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 4(%ecx), %eax
movl %eax, 4(%edx)
.p2align 4
L(ExitTail9):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 8(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 4(%ecx), %eax
movl %eax, 4(%edx)
.p2align 4
L(ExitTail10):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 9(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 4(%ecx), %eax
movl %eax, 4(%edx)
.p2align 4
L(ExitTail11):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 10(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 4(%ecx), %eax
movl %eax, 4(%edx)
.p2align 4
L(ExitTail12):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 11(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl 4(%ecx), %eax
movl %eax, 4(%edx)
.p2align 4
L(ExitTail13):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 12(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movlpd 5(%ecx), %xmm0
movlpd %xmm0, 5(%edx)
.p2align 4
L(ExitTail14):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 13(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movlpd 6(%ecx), %xmm0
movlpd %xmm0, 6(%edx)
.p2align 4
L(ExitTail15):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 14(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movlpd 7(%ecx), %xmm0
movlpd %xmm0, 7(%edx)
.p2align 4
L(ExitTail16):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movlpd 8(%ecx), %xmm0
movlpd %xmm0, 8(%edx)
ENTRY ( __strlen_sse2_bsf)
ENTRANCE
mov STR(%esp), %edi
+#ifdef __CHKP__
+ bndldx STR(%esp,%edi,1), %bnd0
+ bndcl (%edi),%bnd0
+ bndcu (%edi),%bnd0
+#endif
xor %eax, %eax
mov %edi, %ecx
and $0x3f, %ecx
pxor %xmm3, %xmm3
.p2align 4
L(align16_loop):
+#ifdef __CHKP__
+ bndcu 16(%eax), %bnd0
+#endif
pcmpeqb 16(%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
+#ifdef __CHKP__
+ bndcu 32(%eax), %bnd0
+#endif
pcmpeqb 32(%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
+#ifdef __CHKP__
+ bndcu 48(%eax), %bnd0
+#endif
pcmpeqb 48(%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
+#ifdef __CHKP__
+ bndcu 64(%eax), %bnd0
+#endif
pcmpeqb 64(%eax), %xmm3
pmovmskb %xmm3, %edx
lea 64(%eax), %eax
L(exit_less16):
bsf %edx, %edx
add %edx, %eax
+#ifdef __CHKP__
+ bndcu -1(%edi, %eax), %bnd0
+#endif
RETURN
L(exit16):
sub %edi, %eax
bsf %edx, %edx
add %edx, %eax
add $16, %eax
+#ifdef __CHKP__
+ bndcu -1(%edi, %eax), %bnd0
+#endif
RETURN
L(exit32):
sub %edi, %eax
bsf %edx, %edx
add %edx, %eax
add $32, %eax
+#ifdef __CHKP__
+ bndcu -1(%edi, %eax), %bnd0
+#endif
RETURN
L(exit48):
sub %edi, %eax
bsf %edx, %edx
add %edx, %eax
add $48, %eax
+#ifdef __CHKP__
+ bndcu -1(%edi, %eax), %bnd0
+#endif
POP (%edi)
POP (%esi)
ret
# define PUSH(REG) pushl REG; CFI_PUSH (REG)
# define POP(REG) popl REG; CFI_POP (REG)
# undef RETURN
-# define RETURN POP (%edi); CFI_PUSH(%edi); ret
+# define RETURN \
+ mov STR+4(%esp),%edx; \
+ bndcu -1(%edx,%eax), %bnd0; \
+ POP (%edi); CFI_PUSH(%edi); ret
# endif
# ifndef STRLEN
atom_text_section
ENTRY (STRLEN)
mov STR(%esp), %edx
+# ifdef __CHKP__
+ bndldx STR(%esp,%edx,1), %bnd0
+# endif
# ifdef USE_AS_STRNLEN
PUSH (%edi)
movl LEN(%esp), %edi
sub $4, %edi
jbe L(len_less4_prolog)
# endif
+# ifdef __CHKP__
+ bndcl (%edx),%bnd0
+ bndcu (%edx),%bnd0
+# endif
# endif
xor %eax, %eax
cmpb $0, (%edx)
jbe L(len_less64)
# endif
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
pxor %xmm1, %xmm1
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
pxor %xmm2, %xmm2
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
pxor %xmm3, %xmm3
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
jbe L(len_less64)
# endif
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
jbe L(len_less64)
# endif
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
jbe L(len_less64)
# endif
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
lea 16(%eax), %eax
jnz L(exit)
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
pcmpeqb (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
sub $64, %edi
jbe L(len_less64)
# endif
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movaps (%eax), %xmm0
movaps 16(%eax), %xmm1
movaps 32(%eax), %xmm2
add $4, %edi
jz L(exit_tail0)
+# ifdef __CHKP__
+ bndcl (%edx),%bnd0
+ bndcu (%edx),%bnd0
+# endif
cmpb $0, (%edx)
jz L(exit_tail0)
cmp $1, %edi
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx), %bnd0
+ bndcu (%ecx), %bnd0
+# endif
+
PUSH (%edi)
pxor %xmm2, %xmm2
mov %ecx, %edi
jz L(return_null)
bsr %eax, %eax
add %edi, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
POP (%edi)
ret
CFI_PUSH (%edi)
jz L(return_null)
bsr %eax, %eax
add %edi, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
POP (%edi)
ret
CFI_PUSH (%edi)
/* Loop start on aligned string. */
.p2align 4
L(loop):
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi), %bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqb %xmm0, %xmm2
add $16, %edi
POP (%esi)
sub $16, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
POP (%edi)
ret
bsr %eax, %eax
add %edi, %eax
sub $16, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
POP (%edi)
ret
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
-
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx),%bnd0
+ bndcu (%ecx),%bnd0
+# endif
mov %ecx, %eax
punpckldq %xmm1, %xmm1
pxor %xmm2, %xmm2
test $15, %dl
jnz L(return_null)
lea 4(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
CFI_PUSH (%edi)
.p2align 4
L(loop):
add $16, %ecx
+# ifdef __CHKP__
+ bndcu (%ecx),%bnd0
+# endif
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
jnz L(matches)
add $16, %ecx
+# ifdef __CHKP__
+ bndcu (%ecx),%bnd0
+# endif
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
jnz L(matches)
add $16, %ecx
+# ifdef __CHKP__
+ bndcu (%ecx),%bnd0
+# endif
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
jnz L(matches)
add $16, %ecx
+# ifdef __CHKP__
+ bndcu (%ecx),%bnd0
+# endif
movdqa (%ecx), %xmm0
pcmpeqd %xmm0, %xmm2
pcmpeqd %xmm1, %xmm0
test $15, %dl
jnz L(return_null)
lea 4(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
L(match_case2_4):
mov %ecx, %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
test $15, %dh
jnz L(return_null)
lea 12(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
L(match_case2_12):
lea 8(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
test $0x01, %al
jnz L(exit0)
lea 4(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
test $0x01, %ah
jnz L(exit3)
lea 12(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
L(exit0):
mov %ecx, %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
L(exit3):
lea 8(%ecx), %eax
+# ifdef __CHKP__
+ bndcu (%eax),%bnd0
+# endif
ret
.p2align 4
*/
mov STR1(%esp), %edx
mov STR2(%esp), %eax
+#ifdef __CHKP__
+ bndldx STR1(%esp,%edx,1), %bnd0
+ bndldx STR2(%esp,%eax,1), %bnd1
+ bndcl (%edx), %bnd0
+ bndcl (%eax), %bnd1
+ bndcu (%edx), %bnd0
+ bndcu (%eax), %bnd1
+#endif
mov (%eax), %ecx
cmp %ecx, (%edx)
ENTRY (__wcscpy_ssse3)
mov STR1(%esp), %edx
mov STR2(%esp), %ecx
+# ifdef __CHKP__
+ bndldx STR1(%esp,%edx,1), %bnd0
+ bndldx STR2(%esp,%ecx,1), %bnd1
+ bndcl (%edx), %bnd0
+ bndcl (%ecx), %bnd1
+ bndcu (%ecx), %bnd1
+# endif
cmp $0, (%ecx)
jz L(ExitTail4)
+# ifdef __CHKP__
+ bndcu 4(%ecx), %bnd1
+# endif
cmp $0, 4(%ecx)
jz L(ExitTail8)
+# ifdef __CHKP__
+ bndcu 8(%ecx), %bnd1
+# endif
cmp $0, 8(%ecx)
jz L(ExitTail12)
+# ifdef __CHKP__
+ bndcu 12(%ecx), %bnd1
+# endif
cmp $0, 12(%ecx)
jz L(ExitTail16)
pxor %xmm0, %xmm0
pcmpeqd (%esi), %xmm0
movdqu (%ecx), %xmm1
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movdqu %xmm1, (%edx)
pmovmskb %xmm0, %eax
jmp L(Shl12)
L(Align16Both):
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 16(%ecx), %bnd1
+# endif
movaps (%ecx), %xmm1
movaps 16(%ecx), %xmm2
movaps %xmm1, (%edx)
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+ bndcu 16(%ecx, %esi), %bnd1
+# endif
movaps 16(%ecx, %esi), %xmm3
movaps %xmm2, (%edx, %esi)
pcmpeqd %xmm3, %xmm0
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+ bndcu 16(%ecx, %esi), %bnd1
+# endif
movaps 16(%ecx, %esi), %xmm4
movaps %xmm3, (%edx, %esi)
pcmpeqd %xmm4, %xmm0
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+ bndcu 16(%ecx, %esi), %bnd1
+# endif
movaps 16(%ecx, %esi), %xmm1
movaps %xmm4, (%edx, %esi)
pcmpeqd %xmm1, %xmm0
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+ bndcu 16(%ecx, %esi), %bnd1
+# endif
movaps 16(%ecx, %esi), %xmm2
movaps %xmm1, (%edx, %esi)
pcmpeqd %xmm2, %xmm0
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+ bndcu 16(%ecx, %esi), %bnd1
+# endif
movaps 16(%ecx, %esi), %xmm3
movaps %xmm2, (%edx, %esi)
pcmpeqd %xmm3, %xmm0
test %eax, %eax
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu 15(%edx, %esi), %bnd0
+# endif
movaps %xmm3, (%edx, %esi)
mov %ecx, %eax
lea 16(%ecx, %esi), %ecx
mov $-0x40, %esi
L(Aligned64Loop):
+# ifdef __CHKP__
+ bndcu (%ecx), %bnd1
+# endif
movaps (%ecx), %xmm2
movaps 32(%ecx), %xmm3
movaps %xmm2, %xmm4
test %eax, %eax
jnz L(Aligned64Leave)
+# ifdef __CHKP__
+ bndcu -1(%edx), %bnd0
+# endif
movaps %xmm4, -64(%edx)
movaps %xmm5, -48(%edx)
movaps %xmm6, -32(%edx)
pcmpeqd %xmm5, %xmm0
pmovmskb %xmm0, %eax
+# ifdef __CHKP__
+ bndcu -49(%edx), %bnd0
+# endif
movaps %xmm4, -64(%edx)
test %eax, %eax
lea 16(%esi), %esi
pcmpeqd %xmm6, %xmm0
pmovmskb %xmm0, %eax
+# ifdef __CHKP__
+ bndcu -33(%edx), %bnd0
+# endif
movaps %xmm5, -48(%edx)
test %eax, %eax
lea 16(%esi), %esi
jnz L(CopyFrom1To16Bytes)
+# ifdef __CHKP__
+ bndcu -17(%edx), %bnd0
+# endif
movaps %xmm6, -32(%edx)
pcmpeqd %xmm7, %xmm0
pmovmskb %xmm0, %eax
jnz L(CopyFrom1To16Bytes)
mov $-0x40, %esi
+# ifdef __CHKP__
+ bndcu -1(%edx), %bnd0
+# endif
movaps %xmm7, -16(%edx)
jmp L(Aligned64Loop)
.p2align 4
L(Shl4):
+# ifdef __CHKP__
+ bndcu 12(%ecx), %bnd1
+# endif
movaps -4(%ecx), %xmm1
movaps 12(%ecx), %xmm2
L(Shl4Start):
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 28(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 28(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
jnz L(Shl4LoopExit)
palignr $4, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 28(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 28(%ecx), %xmm2
jnz L(Shl4LoopExit)
palignr $4, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 28(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
lea 28(%ecx), %ecx
lea 16(%edx), %edx
movaps -4(%ecx), %xmm1
L(Shl4LoopStart):
+# ifdef __CHKP__
+ bndcu 12(%ecx), %bnd1
+# endif
movaps 12(%ecx), %xmm2
movaps 28(%ecx), %xmm3
movaps %xmm3, %xmm6
lea 64(%ecx), %ecx
palignr $4, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%edx), %bnd0
+# endif
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
L(Shl4LoopExit):
movlpd (%ecx), %xmm0
movl 8(%ecx), %esi
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
+ movaps %xmm2, (%edx)
movlpd %xmm0, (%edx)
movl %esi, 8(%edx)
POP (%esi)
test $0x01, %al
jnz L(Exit4)
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movl %edi, %eax
RETURN
.p2align 4
L(Shl8):
+# ifdef __CHKP__
+ bndcu 8(%ecx), %bnd1
+# endif
movaps -8(%ecx), %xmm1
movaps 8(%ecx), %xmm2
L(Shl8Start):
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 24(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 24(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
jnz L(Shl8LoopExit)
palignr $8, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 24(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 24(%ecx), %xmm2
jnz L(Shl8LoopExit)
palignr $8, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movaps %xmm2, (%edx)
lea 24(%ecx), %ecx
lea 16(%edx), %edx
movaps -8(%ecx), %xmm1
L(Shl8LoopStart):
+# ifdef __CHKP__
+ bndcu 8(%ecx), %bnd1
+# endif
movaps 8(%ecx), %xmm2
movaps 24(%ecx), %xmm3
movaps %xmm3, %xmm6
lea 64(%ecx), %ecx
palignr $8, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%edx), %bnd0
+# endif
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
L(Shl8LoopExit):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
POP (%esi)
add $8, %edx
test $0x01, %al
jnz L(Exit4)
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movl %edi, %eax
RETURN
.p2align 4
L(Shl12):
+# ifdef __CHKP__
+ bndcu 4(%ecx), %bnd1
+# endif
movaps -12(%ecx), %xmm1
movaps 4(%ecx), %xmm2
L(Shl12Start):
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 20(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 20(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
jnz L(Shl12LoopExit)
palignr $12, %xmm1, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 20(%ecx), %bnd1
+# endif
movaps %xmm2, (%edx)
movaps 20(%ecx), %xmm2
jnz L(Shl12LoopExit)
palignr $12, %xmm3, %xmm2
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movaps %xmm2, (%edx)
lea 20(%ecx), %ecx
lea 16(%edx), %edx
movaps -12(%ecx), %xmm1
L(Shl12LoopStart):
+# ifdef __CHKP__
+ bndcu 4(%ecx), %bnd1
+# endif
movaps 4(%ecx), %xmm2
movaps 20(%ecx), %xmm3
movaps %xmm3, %xmm6
lea 64(%ecx), %ecx
palignr $12, %xmm1, %xmm2
movaps %xmm7, %xmm1
+# ifdef __CHKP__
+ bndcu 63(%edx), %bnd0
+# endif
movaps %xmm5, 48(%edx)
movaps %xmm4, 32(%edx)
movaps %xmm3, 16(%edx)
L(Shl12LoopExit):
movl (%ecx), %esi
+# ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+# endif
movl %esi, (%edx)
mov $4, %esi
test $0x01, %al
jnz L(Exit4)
L(Exit8):
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+ bndcu 7(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl %edi, %eax
test $0x01, %ah
jnz L(Exit12)
L(Exit16):
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+ bndcu 15(%ecx), %bnd1
+# endif
movdqu (%ecx), %xmm0
movdqu %xmm0, (%edx)
movl %edi, %eax
.p2align 4
L(Exit4):
+# ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+ bndcu 3(%ecx), %bnd1
+# endif
movl (%ecx), %eax
movl %eax, (%edx)
movl %edi, %eax
.p2align 4
L(Exit12):
+# ifdef __CHKP__
+ bndcu 11(%edx), %bnd0
+ bndcu 11(%ecx), %bnd1
+# endif
movlpd (%ecx), %xmm0
movlpd %xmm0, (%edx)
movl 8(%ecx), %eax
.p2align 4
L(ExitTail4):
movl (%ecx), %eax
+# ifdef __CHKP__
+ bndcu 3(%edx), %bnd0
+# endif
movl %eax, (%edx)
movl %edx, %eax
ret
.p2align 4
L(ExitTail8):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 7(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movl %edx, %eax
ret
.p2align 4
L(ExitTail12):
movlpd (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 11(%edx), %bnd0
+# endif
movlpd %xmm0, (%edx)
movl 8(%ecx), %eax
movl %eax, 8(%edx)
.p2align 4
L(ExitTail16):
movdqu (%ecx), %xmm0
+# ifdef __CHKP__
+ bndcu 15(%edx), %bnd0
+# endif
movdqu %xmm0, (%edx)
movl %edx, %eax
ret
.text
ENTRY (__wcslen_sse2)
mov STR(%esp), %edx
+# ifdef __CHKP__
+ bndldx STR(%esp,%edx,1), %bnd0
+ bndcl (%edx),%bnd0
+ bndcu (%edx),%bnd0
+# endif
cmp $0, (%edx)
jz L(exit_tail0)
+# ifdef __CHKP__
+ bndcu 4(%edx), %bnd0
+# endif
cmp $0, 4(%edx)
jz L(exit_tail1)
+# ifdef __CHKP__
+ bndcu 8(%edx), %bnd0
+# endif
cmp $0, 8(%edx)
jz L(exit_tail2)
+# ifdef __CHKP__
+ bndcu 12(%edx), %bnd0
+# endif
cmp $0, 12(%edx)
jz L(exit_tail3)
+# ifdef __CHKP__
+ bndcu 16(%edx), %bnd0
+# endif
cmp $0, 16(%edx)
jz L(exit_tail4)
+# ifdef __CHKP__
+ bndcu 20(%edx), %bnd0
+# endif
cmp $0, 20(%edx)
jz L(exit_tail5)
+# ifdef __CHKP__
+ bndcu 24(%edx), %bnd0
+# endif
cmp $0, 24(%edx)
jz L(exit_tail6)
+# ifdef __CHKP__
+ bndcu 28(%edx), %bnd0
+# endif
cmp $0, 28(%edx)
jz L(exit_tail7)
lea 16(%edx), %ecx
and $-16, %eax
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
pcmpeqd (%eax), %xmm0
pmovmskb %xmm0, %edx
pxor %xmm1, %xmm1
lea 16(%eax), %eax
jnz L(exit)
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
pcmpeqd (%eax), %xmm1
pmovmskb %xmm1, %edx
pxor %xmm2, %xmm2
lea 16(%eax), %eax
jnz L(exit)
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
pcmpeqd (%eax), %xmm2
pmovmskb %xmm2, %edx
pxor %xmm3, %xmm3
lea 16(%eax), %eax
jnz L(exit)
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
pcmpeqd (%eax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
.p2align 4
L(aligned_64_loop):
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
movaps (%eax), %xmm0
movaps 16(%eax), %xmm1
movaps 32(%eax), %xmm2
mov %dl, %cl
and $15, %cl
jz L(exit_1)
+# ifdef __CHKP__
+ mov STR(%esp), %edx
+ bndcu -1(%edx, %eax, 4), %bnd0
+# endif
ret
.p2align 4
and $15, %ch
jz L(exit_3)
add $2, %eax
+# ifdef __CHKP__
+ mov STR(%esp), %edx
+ bndcu -1(%edx, %eax, 4), %bnd0
+# endif
ret
.p2align 4
L(exit_1):
add $1, %eax
+# ifdef __CHKP__
+ mov STR(%esp), %edx
+ bndcu -1(%edx, %eax, 4), %bnd0
+# endif
ret
.p2align 4
L(exit_3):
add $3, %eax
+# ifdef __CHKP__
+ mov STR(%esp), %edx
+ bndcu -1(%edx, %eax, 4), %bnd0
+# endif
ret
.p2align 4
# define STR1 PARMS
# define STR2 STR1+4
+# ifdef __CHKP__
+# undef RETURN
+# define RETURN bndcu (%eax),%bnd0; \
+ POP (%edi); ret; CFI_PUSH (%edi);
+# endif
+
atom_text_section
ENTRY (__wcsrchr_sse2)
ENTRANCE
mov STR1(%esp), %ecx
movd STR2(%esp), %xmm1
+# ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndcl (%ecx),%bnd0
+ bndcu (%ecx),%bnd0
+# endif
mov %ecx, %edi
punpckldq %xmm1, %xmm1
/* Loop start on aligned string. */
.p2align 4
L(loop):
+# ifdef __CHKP__
+ bndcu (%edi),%bnd0
+# endif
movdqa (%edi), %xmm0
pcmpeqd %xmm0, %xmm2
add $16, %edi
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi),%bnd0
+# endif
movdqa (%edi), %xmm3
pcmpeqd %xmm3, %xmm2
add $16, %edi
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi),%bnd0
+# endif
movdqa (%edi), %xmm4
pcmpeqd %xmm4, %xmm2
add $16, %edi
or %eax, %ecx
jnz L(matches)
+# ifdef __CHKP__
+ bndcu (%edi),%bnd0
+# endif
movdqa (%edi), %xmm5
pcmpeqd %xmm5, %xmm2
add $16, %edi
movl STR1(%esp), %ecx
movl STR2(%esp), %edx
-
-L(oop): movb (%ecx), %al
+#ifdef __CHKP__
+ bndldx STR1(%esp,%ecx,1), %bnd0
+ bndldx STR2(%esp,%edx,1), %bnd1
+ bndcl (%ecx), %bnd0
+ bndcl (%edx), %bnd1
+#endif
+
+L(oop):
+#ifdef __CHKP__
+ bndcu (%ecx), %bnd0
+ bndcu (%edx), %bnd1
+#endif
+ movb (%ecx), %al
cmpb (%edx), %al
jne L(neq)
incl %ecx
testl %edx, %edx
jz L(returnNULL)
movl DELIM(%esp), %eax /* Get start of delimiter set. */
+#ifdef __CHKP__
+ bndldx STR(%esp,%edx,1),%bnd0
+ bndldx DELIM(%esp,%eax,1),%bnd1
+ bndcl (%edx), %bnd0
+ bndcu (%edx), %bnd0
+ bndcl (%eax), %bnd1
+ bndcu (%eax), %bnd1
+#endif
/* For understanding the following code remember that %ecx == 0 now.
Although all the following instruction only modify %cl we always
movl LEN(%esp), %esi /* len: length of memory block. */
cfi_rel_offset (esi, 4)
+#ifdef __CHKP__
+ bndldx STR(%esp,%eax,1), %bnd0
+ bndcl (%eax), %bnd0
+#endif
+
/* If my must not test more than three characters test
them one by one. This is especially true for 0. */
cmpl $4, %esi
testb $3, %al /* correctly aligned ? */
je L(2) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
testb $3, %al /* correctly aligned ? */
je L(2) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
testb $3, %al /* correctly aligned ? */
je L(2) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
ALIGN (4)
-L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
+L(1):
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
+ movl (%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
(following LL(13) below). Even the len can be compared with
constants instead of decrementing each time. */
+#ifdef __CHKP__
+ bndcu 4(%eax), %bnd0
+#endif
movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
the addition will not result in 0. */
jnz L(7) /* found it => return pointer */
+#ifdef __CHKP__
+ bndcu 8(%eax), %bnd0
+#endif
movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
the addition will not result in 0. */
jnz L(6) /* found it => return pointer */
+#ifdef __CHKP__
+ bndcu 12(%eax), %bnd0
+#endif
movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
cmpl $4-16, %esi /* rest < 4 bytes? */
jb L(3) /* yes, than test byte by byte */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
cmpl $8-16, %esi /* rest < 8 bytes? */
jb L(3) /* yes, than test byte by byte */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
cmpl $12-16, %esi /* rest < 12 bytes? */
jb L(3) /* yes, than test byte by byte */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
movl (%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
L(3): andl $3, %esi /* mask out uninteresting bytes */
jz L(4) /* no remaining bytes => return NULL */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte with CHR */
je L(9) /* equal, than return pointer */
incl %eax /* increment source pointer */
decl %esi /* decrement length */
jz L(4) /* no remaining bytes => return NULL */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte with CHR */
je L(9) /* equal, than return pointer */
incl %eax /* increment source pointer */
decl %esi /* decrement length */
jz L(4) /* no remaining bytes => return NULL */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte with CHR */
je L(9) /* equal, than return pointer */
cfi_rel_offset (esi, 0)
movl BLK2(%esp), %edi
movl LEN(%esp), %ecx
+#ifdef __CHKP__
+ bndldx BLK1(%esp,%esi,1), %bnd0
+ bndldx BLK2(%esp,%edi,1), %bnd1
+ bndcl (%esi), %bnd0
+ bndcl (%edi), %bnd1
+#endif
cld /* Set direction of comparison. */
Note that the following operation does not change 0xffffffff. */
orb $1, %al /* Change 0 to 1. */
-L(1): popl %esi /* Restore registers. */
+L(1):
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+ bndcu (%edi), %bnd1
+#endif
+ popl %esi /* Restore registers. */
+
cfi_adjust_cfa_offset (-4)
cfi_restore (esi)
movl %edx, %edi
movl STR(%esp), %eax
movl CHR(%esp), %edx
+#ifdef __CHKP__
+ bndldx STR(%esp,%eax,1), %bnd0
+ bndcl (%eax), %bnd0
+#endif
+
/* At the moment %edx contains C. What we need for the
algorithm is C in all bytes of the dword. Avoid
operations on 16 bit words because these require an
testb $3, %al /* correctly aligned ? */
je L(1) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
testb $3, %al /* correctly aligned ? */
je L(1) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
testb $3, %al /* correctly aligned ? */
je L(1) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
cmpb %dl, (%eax) /* compare byte */
je L(9) /* target found => return */
incl %eax /* increment source pointer */
/* Each round the main loop processes 16 bytes. */
ALIGN (4)
-L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */
+L(1):
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
+ movl (%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
(following LL(13) below). Even the len can be compared with
constants instead of decrementing each time. */
+#ifdef __CHKP__
+ bndcu 4(%eax), %bnd0
+#endif
movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
the addition will not result in 0. */
jnz L(7) /* found it => return pointer */
+#ifdef __CHKP__
+ bndcu 8(%eax), %bnd0
+#endif
movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
the addition will not result in 0. */
jnz L(6) /* found it => return pointer */
+#ifdef __CHKP__
+ bndcu 12(%eax), %bnd0
+#endif
movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
/* No further test needed we we know it is one of the four bytes. */
L(9):
+#ifdef __CHKP__
+ bndcu (%eax), %bnd0
+#endif
popl %edi /* pop saved register */
cfi_adjust_cfa_offset (-4)
cfi_restore (edi)
movl SRC(%esp), %esi
cfi_rel_offset (esi, 0)
movl LEN(%esp), %ecx
+#ifdef __CHKP__
+ bndldx DEST(%esp,%eax,1), %bnd0
+ bndldx SRC(%esp,%esi,1), %bnd1
+ bndcl (%eax), %bnd0
+ bndcu -1(%eax, %ecx), %bnd0
+ bndcl (%esi), %bnd1
+ bndcu (%esi), %bnd1
+#endif
subl %eax, %esi /* magic: reduce number of loop variants
to one using addressing mode */
movl STR(%esp), %eax
movl CHR(%esp), %edx
+# ifdef __CHKP__
+ bndldx STR(%esp,%eax,1), %bnd0
+ bndcl (%eax), %bnd0
+ bndcu (%eax), %bnd0
+# endif
/* At the moment %edx contains CHR. What we need for the
algorithm is CHR in all bytes of the dword. Avoid
operations on 16 bit words because these require an
testb $3, %al /* correctly aligned ? */
jz L(11) /* yes => begin loop */
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
movb (%eax), %cl /* load byte in question (we need it twice) */
cmpb %cl, %dl /* compare byte */
je L(6) /* target found => return */
testb $3, %al /* correctly aligned ? */
jz L(11) /* yes => begin loop */
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
movb (%eax), %cl /* load byte in question (we need it twice) */
cmpb %cl, %dl /* compare byte */
je L(6) /* target found => return */
testb $3, %al /* correctly aligned ? */
jz L(11) /* yes => begin loop */
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
movb (%eax), %cl /* load byte in question (we need it twice) */
cmpb %cl, %dl /* compare byte */
je L(6) /* target found => return */
L(1): addl $16, %eax /* adjust pointer for whole round */
-L(11): movl (%eax), %ecx /* get word (= 4 bytes) in question */
+L(11):
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
+ movl (%eax), %ecx /* get word (= 4 bytes) in question */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
movl $0xfefefeff, %edi /* magic value */
the addition will not result in 0. */
jnz L(7) /* found NUL => return NULL */
+# ifdef __CHKP__
+ bndcu 4(%eax), %bnd0
+# endif
movl 4(%eax), %ecx /* get word (= 4 bytes) in question */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
the addition will not result in 0. */
jnz L(71) /* found NUL => return NULL */
+# ifdef __CHKP__
+ bndcu 8(%eax), %bnd0
+# endif
movl 8(%eax), %ecx /* get word (= 4 bytes) in question */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
the addition will not result in 0. */
jnz L(72) /* found NUL => return NULL */
+# ifdef __CHKP__
+ bndcu 12(%eax), %bnd0
+# endif
movl 12(%eax), %ecx /* get word (= 4 bytes) in question */
xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c
are now 0 */
/* It must be in the fourth byte and it cannot be NUL. */
incl %eax
-L(6): popl %edi /* restore saved register content */
+L(6):
+# ifdef __CHKP__
+ bndcu (%eax), %bnd0
+# endif
+ popl %edi /* restore saved register content */
cfi_adjust_cfa_offset (-4)
cfi_restore (edi)
movl STR(%esp), %edx
movl STOP(%esp), %eax
+#ifdef __CHKP__
+ bndldx STR(%esp,%edx,1), %bnd0
+ bndldx STOP(%esp,%eax,1), %bnd1
+ bndcl (%edx), %bnd0
+ bndcl (%eax), %bnd1
+ bndcu (%edx), %bnd0
+ bndcu (%eax), %bnd1
+#endif
/* First we create a table with flags for all possible characters.
For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
movl STR(%esp), %edx
movl STOP(%esp), %eax
+#ifdef __CHKP__
+ bndldx STR(%esp,%edx,1), %bnd0
+ bndldx STOP(%esp,%eax,1), %bnd1
+ bndcl (%edx), %bnd0
+ bndcl (%eax), %bnd1
+ bndcu (%edx), %bnd0
+ bndcu (%eax), %bnd1
+#endif
/* First we create a table with flags for all possible characters.
For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
movl STR(%esp), %esi
cfi_rel_offset (esi, 0)
movl CHR(%esp), %ecx
+#ifdef __CHKP__
+ bndldx STR(%esp,%esi,1), %bnd0
+ bndcl (%esi), %bnd0
+#endif
/* At the moment %ecx contains C. What we need for the
algorithm is C in all bytes of the dword. Avoid
testl $3, %esi /* correctly aligned ? */
jz L(19) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+#endif
movb (%esi), %dl /* load byte in question (we need it twice) */
cmpb %dl, %cl /* compare byte */
jne L(11) /* target found => return */
testl $3, %esi /* correctly aligned ? */
jz L(19) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+#endif
movb (%esi), %dl /* load byte in question (we need it twice) */
cmpb %dl, %cl /* compare byte */
jne L(12) /* target found => return */
testl $3, %esi /* correctly aligned ? */
jz L(19) /* yes => begin loop */
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+#endif
movb (%esi), %dl /* load byte in question (we need it twice) */
cmpb %dl, %cl /* compare byte */
jne L(13) /* target found => return */
L(1): addl $16, %esi /* increment pointer for full round */
-L(19): movl (%esi), %edx /* get word (= 4 bytes) in question */
+L(19):
+#ifdef __CHKP__
+ bndcu (%esi), %bnd0
+#endif
+ movl (%esi), %edx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
addl %edx, %edi /* add the magic value to the word. We get
carry bits reported for each byte which
the addition will not result in 0. */
jnz L(3) /* C is detected in the word => examine it */
+#ifdef __CHKP__
+ bndcu 4(%esi), %bnd0
+#endif
movl 4(%esi), %edx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
addl %edx, %edi /* add the magic value to the word. We get
the addition will not result in 0. */
jnz L(31) /* C is detected in the word => examine it */
+#ifdef __CHKP__
+ bndcu 8(%esi), %bnd0
+#endif
movl 8(%esi), %edx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
addl %edx, %edi /* add the magic value to the word. We get
the addition will not result in 0. */
jnz L(32) /* C is detected in the word => examine it */
+#ifdef __CHKP__
+ bndcu 12(%esi), %bnd0
+#endif
movl 12(%esi), %edx /* get word (= 4 bytes) in question */
movl $0xfefefeff, %edi /* magic value */
addl %edx, %edi /* add the magic value to the word. We get
movl STR(%esp), %edx
movl DELIM(%esp), %eax
+#ifdef __CHKP__
+ bndldx STR(%esp,%edx,1), %bnd0
+ bndldx DELIM(%esp,%eax,1), %bnd1
+#endif
+
#if !defined USE_AS_STRTOK_R && defined PIC
pushl %ebx /* Save PIC register. */
cfi_adjust_cfa_offset (4)
/* Store the pointer to the next character. */
#ifdef USE_AS_STRTOK_R
movl SAVE(%esp), %ecx
+# ifdef __CHKP__
+ bndmov %bnd2, %bnd0
+# endif
#endif
movl %edx, SAVE_PTR
xorl %eax, %eax
#ifdef USE_AS_STRTOK_R
movl SAVE(%esp), %ecx
+# ifdef __CHKP__
+ bndmov %bnd2, %bnd0
+# endif
#endif
movl %edx, SAVE_PTR
jmp L(epilogue)