When binding a udp_sock to a local address and port, UDP uses
two hashes (udptable->hash and udptable->hash2) for collision
detection. The current code switches to "hash2" when
hslot->count > 10.
"hash2" is keyed by local address and local port.
"hash" is keyed by local port only.
The issue can be shown in the following bind sequence (pseudo code):
bind(fd1, "[fd00::1]:8888")
bind(fd2, "[fd00::2]:8888")
bind(fd3, "[fd00::3]:8888")
bind(fd4, "[fd00::4]:8888")
bind(fd5, "[fd00::5]:8888")
bind(fd6, "[fd00::6]:8888")
bind(fd7, "[fd00::7]:8888")
bind(fd8, "[fd00::8]:8888")
bind(fd9, "[fd00::9]:8888")
bind(fd10, "[fd00::10]:8888")
/* Correctly return -EADDRINUSE because "hash" is used
* instead of "hash2". udp_lib_lport_inuse() detects the
* conflict.
*/
bind(fail_fd, "[::]:8888")
/* After one more socket is bound to "[fd00::11]:8888",
* hslot->count exceeds 10 and "hash2" is used instead.
*/
bind(fd11, "[fd00::11]:8888")
bind(fail_fd, "[::]:8888") /* succeeds unexpectedly */
The same issue applies to the IPv4 wildcard address "0.0.0.0"
and the IPv4-mapped wildcard address "::ffff:0.0.0.0". For
example, if there are existing sockets bound to
"192.168.1.[1-11]:8888", then binding "0.0.0.0:8888" or
"[::ffff:0.0.0.0]:8888" can also miss the conflict when
hslot->count > 10.
TCP inet_csk_get_port() already has the correct check in
inet_use_bhash2_on_bind(). Rename it to
inet_use_hash2_on_bind() and move it to inet_hashtables.h
so udp.c can reuse it in this fix.
Fixes: 30fff9231fad ("udp: bind() optimisation")
Reported-by: Andrew Onyshchuk <oandrew@meta.com>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Reviewed-by: Kuniyuki Iwashima <kuniyu@google.com>
Link: https://patch.msgid.link/20260319181817.1901357-1-martin.lau@linux.dev
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
}
+static inline bool inet_use_hash2_on_bind(const struct sock *sk)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6) {
+ if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
+ return false;
+
+ if (!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
+ return true;
+ }
+#endif
+ return sk->sk_rcv_saddr != htonl(INADDR_ANY);
+}
+
struct inet_bind_hashbucket *
inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port);
}
EXPORT_SYMBOL(inet_sk_get_local_port_range);
-static bool inet_use_bhash2_on_bind(const struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6) {
- if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
- return false;
-
- if (!ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
- return true;
- }
-#endif
- return sk->sk_rcv_saddr != htonl(INADDR_ANY);
-}
-
static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2,
kuid_t uid, bool relax,
bool reuseport_cb_ok, bool reuseport_ok)
* checks separately because their spinlocks have to be acquired/released
* independently of each other, to prevent possible deadlocks
*/
- if (inet_use_bhash2_on_bind(sk))
+ if (inet_use_hash2_on_bind(sk))
return tb2 && inet_bhash2_conflict(sk, tb2, uid, relax,
reuseport_cb_ok, reuseport_ok);
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
spin_lock_bh(&head->lock);
- if (inet_use_bhash2_on_bind(sk)) {
+ if (inet_use_hash2_on_bind(sk)) {
if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, relax, false))
goto next_port;
}
check_bind_conflict = false;
}
- if (check_bind_conflict && inet_use_bhash2_on_bind(sk)) {
+ if (check_bind_conflict && inet_use_hash2_on_bind(sk)) {
if (inet_bhash2_addr_any_conflict(sk, port, l3mdev, true, true))
goto fail_unlock;
}
} else {
hslot = udp_hashslot(udptable, net, snum);
spin_lock_bh(&hslot->lock);
- if (hslot->count > 10) {
+ if (inet_use_hash2_on_bind(sk) && hslot->count > 10) {
int exist;
unsigned int slot2 = udp_sk(sk)->udp_portaddr_hash ^ snum;