1 /* Optimized memmove implementation for POWER10.
2 Copyright (C) 2021-2023 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, see
17 <https://www.gnu.org/licenses/>. */
22 /* void* [r3] memmove (void *dest [r3], const void *src [r4], size_t len [r5])
24 This optimization checks if 'src' and 'dst' overlap. If they do not
25 or 'src' is ahead of 'dest' then it copies forward.
26 Otherwise, an optimized backward copy is used. */
29 # define MEMMOVE memmove
32 ENTRY_TOCLESS (MEMMOVE, 5)
37 /* Check if there is overlap, if so it will branch to backward copy. */
40 blt cr7,L(memmove_bwd)
42 /* Fast path for length shorter than 16 bytes. */
49 /* For shorter lengths aligning the dest address to 16 bytes either
50 decreases performance or is irrelevant. I'm making use of this
51 comparison to skip the alignment in. */
54 /* Account for the first 16-byte copy. */
56 addi r11,r3,16 /* use r11 to keep dest address on r3. */
62 /* Account for the first copy <= 16 bytes. This is necessary for
63 memmove because at this point the src address can be in front of the
70 add r11,r3,r9 /* use r11 to keep dest address on r3. */
73 /* Align dest to 16 bytes. */
95 /* Main loop that copies 128 bytes each iteration. */
148 /* Copies the last 1-63 bytes. */
151 /* r8 holds the number of bytes that will be copied with lxv/stxv. */
180 /* If dest and src overlap, we should copy backwards. */
185 /* Optimization for length smaller than 16 bytes. */
189 /* For shorter lengths the alignment either slows down or is irrelevant.
190 The forward copy uses a already need 256 comparison for that. Here
191 it's using 128 as it will reduce code and improve readability. */
193 blt cr7,L(bwd_loop_tail)
195 /* Align dest address to 16 bytes. */
212 /* Main loop that copies 128 bytes every iteration. */
249 ble cr5,L(bwd_final_64)
266 /* Copies the last 1-63 bytes. */
269 /* r8 holds the number of bytes that will be copied with lxv/stxv. */
296 /* Copy last 16 bytes. */
306 END_GEN_TB (MEMMOVE,TB_TOCLESS)
307 libc_hidden_builtin_def (memmove)