From: Adhemerval Zanella Date: Wed, 4 Jun 2025 20:42:43 +0000 (-0300) Subject: resolv: Optimize inet_ntop X-Git-Tag: glibc-2.42~90 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=80f389ed7e684582439629b264c22a84c26bfd8e;p=thirdparty%2Fglibc.git resolv: Optimize inet_ntop The benchtests/inet_ntop_ipv4 and benchtests/inet_ntop_ipv6 profile shows that most of time is spent in costly sprint operations: $ perf record ./benchtests/bench-inet_ntop_ipv4 && perf report --stdio [...] 38.53% bench-inet_ntop libc.so [.] __printf_buffer 18.69% bench-inet_ntop libc.so [.] __printf_buffer_write 11.01% bench-inet_ntop libc.so [.] _itoa_word 8.02% bench-inet_ntop bench-inet_ntop_ipv4 [.] bench_start 6.99% bench-inet_ntop libc.so [.] __memmove_avx_unaligned_erms 3.86% bench-inet_ntop libc.so [.] __strchrnul_avx2 2.82% bench-inet_ntop libc.so [.] __strcpy_avx2 1.90% bench-inet_ntop libc.so [.] inet_ntop4 1.78% bench-inet_ntop libc.so [.] __vsprintf_internal 1.55% bench-inet_ntop libc.so [.] __sprintf_chk 1.18% bench-inet_ntop libc.so [.] __GI___inet_ntop $ perf record ./benchtests/bench-inet_ntop_ipv6 && perf report --stdio 35.44% bench-inet_ntop libc.so [.] __printf_buffer 14.35% bench-inet_ntop libc.so [.] __printf_buffer_write 10.27% bench-inet_ntop libc.so [.] __GI___inet_ntop 7.93% bench-inet_ntop libc.so [.] _itoa_word 7.00% bench-inet_ntop libc.so [.] __sprintf_chk 6.20% bench-inet_ntop libc.so [.] __vsprintf_internal 5.26% bench-inet_ntop libc.so [.] __strchrnul_avx2 5.05% bench-inet_ntop bench-inet_ntop_ipv6 [.] bench_start 3.70% bench-inet_ntop libc.so [.] __memmove_avx_unaligned_erms 2.11% bench-inet_ntop libc.so [.] __printf_buffer_done A new implementation is used instead: * The printf usage is replaced with an expanded function that prints either an IPv4 octet or an IPv6 quartet; * The strcpy is replaced with a memcpy (since ABIs usually tends to optimize the latter); * For IPv6, the '::' shorthanding is done in-place instead of using a temporary buffer. * An temporary buffer is used iff the size if larger than INET_ADDRSTRLEN/INET6_ADDRSTRLEN. * Inline is used for both inet_ntop4 and inet_ntop6, The code is significand rewrote, so I take this requires a new license. The performance results on aarch64 Neoverse1 with gcc 14.2.1: * master aarch64-linux-gnu-master$ ./benchtests/bench-inet_ntop_ipv4 "inet_ntop_ipv4": { "workload-ipv4-random": { "duration": 1.43067e+09, "iterations": 8e+06, "reciprocal-throughput": 178.572, "latency": 179.096, "max-throughput": 5.59997e+06, "min-throughput": 5.58359e+06 } aarch64-linux-gnu-master$ ./benchtests/bench-inet_ntop_ipv6 "inet_ntop_ipv6": { "workload-ipv6-random": { "duration": 1.68539e+09, "iterations": 4e+06, "reciprocal-throughput": 421.307, "latency": 421.388, "max-throughput": 2.37357e+06, "min-throughput": 2.37311e+06 } } * patched aarch64-linux-gnu$ ./benchtests/bench-inet_ntop_ipv4 "inet_ntop_ipv4": { "workload-ipv4-random": { "duration": 1.06133e+09, "iterations": 5.6e+07, "reciprocal-throughput": 18.8482, "latency": 19.0565, "max-throughput": 5.30555e+07, "min-throughput": 5.24755e+07 } } aarch64-linux-gnu$ ./benchtests/bench-inet_ntop_ipv6 "inet_ntop_ipv6": { "workload-ipv6-random": { "duration": 1.01246e+09, "iterations": 2.4e+07, "reciprocal-throughput": 42.5576, "latency": 41.8139, "max-throughput": 2.34976e+07, "min-throughput": 2.39155e+07 } } Checked on aarch64-linux-gnu and x86_64-linux-gnu. Reviewed-by: DJ Delorie --- diff --git a/resolv/inet_ntop.c b/resolv/inet_ntop.c index 5c414373b7..0fdef3231a 100644 --- a/resolv/inet_ntop.c +++ b/resolv/inet_ntop.c @@ -1,136 +1,155 @@ -/* - * Copyright (c) 1996-1999 by Internet Software Consortium. - * - * Permission to use, copy, modify, and distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS - * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE - * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL - * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR - * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS - * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS - * SOFTWARE. - */ - -#include -#include -#include - -#include +/* Convert IPv4/IPv6 addresses from binary to text form. + Copyright (C) 1996-2025 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, see + . */ + #include #include - #include -#include #include +#include <_itoa.h> -#ifdef SPRINTF_CHAR -# define SPRINTF(x) strlen (sprintf /**/ x) -#else -# define SPRINTF(x) ((size_t) sprintf x) -#endif - -/* - * WARNING: Don't even consider trying to compile this on a system where - * sizeof(int) < 4. sizeof(int) > 4 is fine; all the world's not a VAX. - */ - -static const char *inet_ntop4 (const u_char *src, char *dst, socklen_t size); -static const char *inet_ntop6 (const u_char *src, char *dst, socklen_t size); - -/* char * - * __inet_ntop(af, src, dst, size) - * convert a network format address to presentation format. - * return: - * pointer to presentation format address (`dst'), or NULL (see errno). - * author: - * Paul Vixie, 1996. - */ -const char * -__inet_ntop (int af, const void *src, char *dst, socklen_t size) +static inline char * +put_uint8 (uint8_t word, char *tp) { - switch (af) + int s = 1; + if (word >= 10) { - case AF_INET: - return (inet_ntop4 (src, dst, size)); - case AF_INET6: - return (inet_ntop6 (src, dst, size)); - default: - __set_errno (EAFNOSUPPORT); - return (NULL); + if (word >= 100) + { + tp[2] = '0' + word % 10; + word /= 10; + s += 1; + } + + tp[1] = '0' + word % 10; + word /= 10; + s += 1; } - /* NOTREACHED */ + *tp = '0' + word; + return tp + s; +} + +static inline char * +put_uint16 (uint16_t word, char *tp) +{ + if (word >= 0x1000) + *tp++ = _itoa_lower_digits[(word >> 12) & 0xf]; + if (word >= 0x100) + *tp++ = _itoa_lower_digits[(word >> 8) & 0xf]; + if (word >= 0x10) + *tp++ = _itoa_lower_digits[(word >> 4) & 0xf]; + *tp++ = _itoa_lower_digits[word & 0xf]; + return tp; +} + +static __always_inline char * +inet_ntop4_format (const uint8_t *src, char *dst) +{ + dst = put_uint8 (src[0], dst); + *(dst++) = '.'; + dst = put_uint8 (src[1], dst); + *(dst++) = '.'; + dst = put_uint8 (src[2], dst); + *(dst++) = '.'; + dst = put_uint8 (src[3], dst); + *dst++ = '\0'; + return dst; } -libc_hidden_def (__inet_ntop) -weak_alias (__inet_ntop, inet_ntop) -/* const char * - * inet_ntop4(src, dst, size) - * format an IPv4 address - * return: - * `dst' (as a const) - * notes: - * (1) uses no statics - * (2) takes a u_char* not an in_addr as input - * author: - * Paul Vixie, 1996. - */ -static const char * -inet_ntop4 (const u_char *src, char *dst, socklen_t size) +static __always_inline const char * +inet_ntop4 (const uint8_t *src, char *dst, socklen_t size) { - static const char fmt[] = "%u.%u.%u.%u"; - char tmp[sizeof "255.255.255.255"]; + if (size >= INET_ADDRSTRLEN) + { + inet_ntop4_format (src, dst); + return dst; + } - if (SPRINTF ((tmp, fmt, src[0], src[1], src[2], src[3])) >= size) + char tmp[INET_ADDRSTRLEN]; + char *tp = inet_ntop4_format (src, tmp); + socklen_t tmp_s = tp - tmp; + if (tmp_s > size) { __set_errno (ENOSPC); - return (NULL); + return NULL; } - return strcpy (dst, tmp); + return memcpy (dst, tmp, tmp_s); } -/* const char * - * inet_ntop6(src, dst, size) - * convert IPv6 binary address into presentation (printable) format - * author: - * Paul Vixie, 1996. - */ -static const char * -inet_ntop6 (const u_char *src, char *dst, socklen_t size) +struct best_t { - /* - * Note that int32_t and int16_t need only be "at least" large enough - * to contain a value of the specified size. On some systems, like - * Crays, there is no such thing as an integer variable with 16 bits. - * Keep this in mind if you think this function should have been coded - * to use pointer overlays. All the world's not a VAX. - */ - char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"], *tp; - struct - { - int base, len; - } best, cur; - u_int words[NS_IN6ADDRSZ / NS_INT16SZ]; - int i; - - /* - * Preprocess: - * Copy the input (bytewise) array into a wordwise array. - * Find the longest run of 0x00's in src[] for :: shorthanding. - */ - memset (words, '\0', sizeof words); - for (i = 0; i < NS_IN6ADDRSZ; i += 2) - words[i / 2] = (src[i] << 8) | src[i + 1]; - best.base = -1; - cur.base = -1; - best.len = 0; - cur.len = 0; - for (i = 0; i < (NS_IN6ADDRSZ / NS_INT16SZ); i++) + int base; + int len; +}; + +static inline uint16_t +in6_addr_addr16 (const struct in6_addr *src, int idx) +{ + const struct { uint16_t x; } __attribute__((__packed__)) *pptr = + (typeof(pptr))(&src->s6_addr16[idx]); + return ntohs (pptr->x); +} + +static __always_inline char * +inet_ntop6_format (const struct in6_addr *src, struct best_t best, char *dst) +{ + char *tp = dst; + for (int i = 0; i < (NS_IN6ADDRSZ / NS_INT16SZ); i++) { - if (words[i] == 0) + /* Are we inside the best run of 0x00's? */ + if (best.base != -1 && i >= best.base && i < (best.base + best.len)) + { + if (i == best.base) + *tp++ = ':'; + continue; + } + /* Are we following an initial run of 0x00s or any real hex? */ + if (i != 0) + *tp++ = ':'; + /* Is this address an encapsulated IPv4? */ + if (i == 6 && best.base == 0 + && (best.len == 6 || (best.len == 5 + && in6_addr_addr16 (src, 5) == 0xffff))) + { + if (!inet_ntop4 (src->s6_addr + 12, tp, + INET6_ADDRSTRLEN - (tp - dst))) + return NULL; + tp += strlen (tp); + break; + } + tp = put_uint16 (in6_addr_addr16 (src, i), tp); + } + /* Was it a trailing run of 0x00's? */ + if (best.base != -1 && (best.base + best.len) == (NS_IN6ADDRSZ / NS_INT16SZ)) + *tp++ = ':'; + *tp++ = '\0'; + + return tp; +} + +static inline const char * +inet_ntop6 (const struct in6_addr *src, char *dst, socklen_t size) +{ + struct best_t best = { -1, 0 }, cur = { -1, 0 }; + + /* ind the longest run of 0x00's in src[] for :: shorthanding. */ + for (int i = 0; i < (NS_IN6ADDRSZ / NS_INT16SZ); i++) + { + if (in6_addr_addr16 (src, i) == 0) { if (cur.base == -1) cur.base = i, cur.len = 1; @@ -155,45 +174,37 @@ inet_ntop6 (const u_char *src, char *dst, socklen_t size) if (best.base != -1 && best.len < 2) best.base = -1; - /* - * Format the result. - */ - tp = tmp; - for (i = 0; i < (NS_IN6ADDRSZ / NS_INT16SZ); i++) + if (size >= INET6_ADDRSTRLEN) { - /* Are we inside the best run of 0x00's? */ - if (best.base != -1 && i >= best.base && i < (best.base + best.len)) - { - if (i == best.base) - *tp++ = ':'; - continue; - } - /* Are we following an initial run of 0x00s or any real hex? */ - if (i != 0) - *tp++ = ':'; - /* Is this address an encapsulated IPv4? */ - if (i == 6 && best.base == 0 - && (best.len == 6 || (best.len == 5 && words[5] == 0xffff))) - { - if (!inet_ntop4 (src + 12, tp, sizeof tmp - (tp - tmp))) - return (NULL); - tp += strlen (tp); - break; - } - tp += SPRINTF ((tp, "%x", words[i])); + inet_ntop6_format (src, best, dst); + return dst; } - /* Was it a trailing run of 0x00's? */ - if (best.base != -1 && (best.base + best.len) == (NS_IN6ADDRSZ / NS_INT16SZ)) - *tp++ = ':'; - *tp++ = '\0'; - /* - * Check for overflow, copy, and we're done. - */ - if ((socklen_t) (tp - tmp) > size) + char tmp[INET6_ADDRSTRLEN]; + char *tp = inet_ntop6_format (src, best, tmp); + + socklen_t tmp_s = tp - tmp; + if (tmp_s > size) { __set_errno (ENOSPC); return (NULL); } - return strcpy (dst, tmp); + return memcpy (dst, tmp, tmp_s); +} + +const char * +__inet_ntop (int af, const void *src, char *dst, socklen_t size) +{ + switch (af) + { + case AF_INET: + return (inet_ntop4 (src, dst, size)); + case AF_INET6: + return (inet_ntop6 (src, dst, size)); + default: + __set_errno (EAFNOSUPPORT); + return (NULL); + } } +libc_hidden_def (__inet_ntop) +weak_alias (__inet_ntop, inet_ntop)