+++ /dev/null
-From foo@baz Mon Oct 9 09:32:35 CEST 2017
-From: Paolo Abeni <pabeni@redhat.com>
-Date: Thu, 28 Sep 2017 15:51:37 +0200
-Subject: udp: perform source validation for mcast early demux
-
-From: Paolo Abeni <pabeni@redhat.com>
-
-
-[ Upstream commit bc044e8db7962e727a75b591b9851ff2ac5cf846 ]
-
-The UDP early demux can leverate the rx dst cache even for
-multicast unconnected sockets.
-
-In such scenario the ipv4 source address is validated only on
-the first packet in the given flow. After that, when we fetch
-the dst entry from the socket rx cache, we stop enforcing
-the rp_filter and we even start accepting any kind of martian
-addresses.
-
-Disabling the dst cache for unconnected multicast socket will
-cause large performace regression, nearly reducing by half the
-max ingress tput.
-
-Instead we factor out a route helper to completely validate an
-skb source address for multicast packets and we call it from
-the UDP early demux for mcast packets landing on unconnected
-sockets, after successful fetching the related cached dst entry.
-
-This still gives a measurable, but limited performance
-regression:
-
- rp_filter = 0 rp_filter = 1
-edmux disabled: 1182 Kpps 1127 Kpps
-edmux before: 2238 Kpps 2238 Kpps
-edmux after: 2037 Kpps 2019 Kpps
-
-The above figures are on top of current net tree.
-Applying the net-next commit 6e617de84e87 ("net: avoid a full
-fib lookup when rp_filter is disabled.") the delta with
-rp_filter == 0 will decrease even more.
-
-Fixes: 421b3885bf6d ("udp: ipv4: Add udp early demux")
-Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-Signed-off-by: David S. Miller <davem@davemloft.net>
-Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
----
- include/net/route.h | 4 +++-
- net/ipv4/route.c | 46 ++++++++++++++++++++++++++--------------------
- net/ipv4/udp.c | 13 ++++++++++++-
- 3 files changed, 41 insertions(+), 22 deletions(-)
-
---- a/include/net/route.h
-+++ b/include/net/route.h
-@@ -175,7 +175,9 @@ static inline struct rtable *ip_route_ou
- fl4->fl4_gre_key = gre_key;
- return ip_route_output_key(net, fl4);
- }
--
-+int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
-+ u8 tos, struct net_device *dev,
-+ struct in_device *in_dev, u32 *itag);
- int ip_route_input_noref(struct sk_buff *skb, __be32 dst, __be32 src,
- u8 tos, struct net_device *devin);
- int ip_route_input_rcu(struct sk_buff *skb, __be32 dst, __be32 src,
---- a/net/ipv4/route.c
-+++ b/net/ipv4/route.c
-@@ -1520,43 +1520,56 @@ struct rtable *rt_dst_alloc(struct net_d
- EXPORT_SYMBOL(rt_dst_alloc);
-
- /* called in rcu_read_lock() section */
--static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
-- u8 tos, struct net_device *dev, int our)
-+int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr,
-+ u8 tos, struct net_device *dev,
-+ struct in_device *in_dev, u32 *itag)
- {
-- struct rtable *rth;
-- struct in_device *in_dev = __in_dev_get_rcu(dev);
-- unsigned int flags = RTCF_MULTICAST;
-- u32 itag = 0;
- int err;
-
- /* Primary sanity checks. */
--
- if (!in_dev)
- return -EINVAL;
-
- if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) ||
- skb->protocol != htons(ETH_P_IP))
-- goto e_inval;
-+ return -EINVAL;
-
- if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev))
-- goto e_inval;
-+ return -EINVAL;
-
- if (ipv4_is_zeronet(saddr)) {
- if (!ipv4_is_local_multicast(daddr))
-- goto e_inval;
-+ return -EINVAL;
- } else {
- err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
-- in_dev, &itag);
-+ in_dev, itag);
- if (err < 0)
-- goto e_err;
-+ return err;
- }
-+ return 0;
-+}
-+
-+/* called in rcu_read_lock() section */
-+static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
-+ u8 tos, struct net_device *dev, int our)
-+{
-+ struct in_device *in_dev = __in_dev_get_rcu(dev);
-+ unsigned int flags = RTCF_MULTICAST;
-+ struct rtable *rth;
-+ u32 itag = 0;
-+ int err;
-+
-+ err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag);
-+ if (err)
-+ return err;
-+
- if (our)
- flags |= RTCF_LOCAL;
-
- rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST,
- IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false);
- if (!rth)
-- goto e_nobufs;
-+ return -ENOBUFS;
-
- #ifdef CONFIG_IP_ROUTE_CLASSID
- rth->dst.tclassid = itag;
-@@ -1572,13 +1585,6 @@ static int ip_route_input_mc(struct sk_b
-
- skb_dst_set(skb, &rth->dst);
- return 0;
--
--e_nobufs:
-- return -ENOBUFS;
--e_inval:
-- return -EINVAL;
--e_err:
-- return err;
- }
-
-
---- a/net/ipv4/udp.c
-+++ b/net/ipv4/udp.c
-@@ -2220,6 +2220,7 @@ static struct sock *__udp4_lib_demux_loo
- int udp_v4_early_demux(struct sk_buff *skb)
- {
- struct net *net = dev_net(skb->dev);
-+ struct in_device *in_dev = NULL;
- const struct iphdr *iph;
- const struct udphdr *uh;
- struct sock *sk = NULL;
-@@ -2236,7 +2237,7 @@ int udp_v4_early_demux(struct sk_buff *s
-
- if (skb->pkt_type == PACKET_BROADCAST ||
- skb->pkt_type == PACKET_MULTICAST) {
-- struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
-+ in_dev = __in_dev_get_rcu(skb->dev);
-
- if (!in_dev)
- return 0;
-@@ -2266,11 +2267,21 @@ int udp_v4_early_demux(struct sk_buff *s
- if (dst)
- dst = dst_check(dst, 0);
- if (dst) {
-+ u32 itag = 0;
-+
- /* set noref for now.
- * any place which wants to hold dst has to call
- * dst_hold_safe()
- */
- skb_dst_set_noref(skb, dst);
-+
-+ /* for unconnected multicast sockets we need to validate
-+ * the source on each packet
-+ */
-+ if (!inet_sk(sk)->inet_daddr && in_dev)
-+ return ip_mc_validate_source(skb, iph->daddr,
-+ iph->saddr, iph->tos,
-+ skb->dev, in_dev, &itag);
- }
- return 0;
- }