+++ /dev/null
-From foo@baz Mon May 16 11:21:32 PDT 2016
-From: Craig Gallek <kraig@google.com>
-Date: Tue, 12 Apr 2016 13:11:25 -0400
-Subject: soreuseport: fix ordering for mixed v4/v6 sockets
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-From: Craig Gallek <kraig@google.com>
-
-[ Upstream commit d894ba18d4e449b3a7f6eb491f16c9e02933736e ]
-
-With the SO_REUSEPORT socket option, it is possible to create sockets
-in the AF_INET and AF_INET6 domains which are bound to the same IPv4 address.
-This is only possible with SO_REUSEPORT and when not using IPV6_V6ONLY on
-the AF_INET6 sockets.
-
-Prior to the commits referenced below, an incoming IPv4 packet would
-always be routed to a socket of type AF_INET when this mixed-mode was used.
-After those changes, the same packet would be routed to the most recently
-bound socket (if this happened to be an AF_INET6 socket, it would
-have an IPv4 mapped IPv6 address).
-
-The change in behavior occurred because the recent SO_REUSEPORT optimizations
-short-circuit the socket scoring logic as soon as they find a match. They
-did not take into account the scoring logic that favors AF_INET sockets
-over AF_INET6 sockets in the event of a tie.
-
-To fix this problem, this patch changes the insertion order of AF_INET
-and AF_INET6 addresses in the TCP and UDP socket lists when the sockets
-have SO_REUSEPORT set. AF_INET sockets will be inserted at the head of the
-list and AF_INET6 sockets with SO_REUSEPORT set will always be inserted at
-the tail of the list. This will force AF_INET sockets to always be
-considered first.
-
-Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
-Fixes: 125e80b88687 ("soreuseport: fast reuseport TCP socket selection")
-
-Reported-by: Maciej Żenczykowski <maze@google.com>
-Signed-off-by: Craig Gallek <kraig@google.com>
-Signed-off-by: Eric Dumazet <edumazet@google.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- include/linux/rculist_nulls.h | 39 +++++++++++++++++++++++++++++++++++++++
- include/net/sock.h | 6 +++++-
- net/ipv4/udp.c | 9 +++++++--
- 3 files changed, 51 insertions(+), 3 deletions(-)
-
---- a/include/linux/rculist_nulls.h
-+++ b/include/linux/rculist_nulls.h
-@@ -98,6 +98,45 @@ static inline void hlist_nulls_add_head_
- if (!is_a_nulls(first))
- first->pprev = &n->next;
- }
-+
-+/**
-+ * hlist_nulls_add_tail_rcu
-+ * @n: the element to add to the hash list.
-+ * @h: the list to add to.
-+ *
-+ * Description:
-+ * Adds the specified element to the end of the specified hlist_nulls,
-+ * while permitting racing traversals. NOTE: tail insertion requires
-+ * list traversal.
-+ *
-+ * The caller must take whatever precautions are necessary
-+ * (such as holding appropriate locks) to avoid racing
-+ * with another list-mutation primitive, such as hlist_nulls_add_head_rcu()
-+ * or hlist_nulls_del_rcu(), running on this same list.
-+ * However, it is perfectly legal to run concurrently with
-+ * the _rcu list-traversal primitives, such as
-+ * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency
-+ * problems on Alpha CPUs. Regardless of the type of CPU, the
-+ * list-traversal primitive must be guarded by rcu_read_lock().
-+ */
-+static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n,
-+ struct hlist_nulls_head *h)
-+{
-+ struct hlist_nulls_node *i, *last = NULL;
-+
-+ for (i = hlist_nulls_first_rcu(h); !is_a_nulls(i);
-+ i = hlist_nulls_next_rcu(i))
-+ last = i;
-+
-+ if (last) {
-+ n->next = last->next;
-+ n->pprev = &last->next;
-+ rcu_assign_pointer(hlist_nulls_next_rcu(last), n);
-+ } else {
-+ hlist_nulls_add_head_rcu(n, h);
-+ }
-+}
-+
- /**
- * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type
- * @tpos: the type * to use as a loop cursor.
---- a/include/net/sock.h
-+++ b/include/net/sock.h
-@@ -649,7 +649,11 @@ static inline void sk_add_node_rcu(struc
-
- static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
- {
-- hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
-+ if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
-+ sk->sk_family == AF_INET6)
-+ hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list);
-+ else
-+ hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
- }
-
- static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
---- a/net/ipv4/udp.c
-+++ b/net/ipv4/udp.c
-@@ -296,8 +296,13 @@ found:
-
- hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash);
- spin_lock(&hslot2->lock);
-- hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
-- &hslot2->head);
-+ if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport &&
-+ sk->sk_family == AF_INET6)
-+ hlist_nulls_add_tail_rcu(&udp_sk(sk)->udp_portaddr_node,
-+ &hslot2->head);
-+ else
-+ hlist_nulls_add_head_rcu(&udp_sk(sk)->udp_portaddr_node,
-+ &hslot2->head);
- hslot2->count++;
- spin_unlock(&hslot2->lock);
- }