]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.13-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 15 Nov 2017 16:25:58 +0000 (17:25 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 15 Nov 2017 16:25:58 +0000 (17:25 +0100)
added patches:
geneve-fix-function-matching-vni-and-tunnel-id-on-big-endian.patch
gso-fix-payload-length-when-gso_size-is-zero.patch
ip6_gre-only-increase-err_count-for-some-certain-type-icmpv6-in-ip6gre_err.patch
ip6_gre-update-dst-pmtu-if-dev-mtu-has-been-updated-by-toobig-in-__gre6_xmit.patch
ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch
ipv4-fix-traffic-triggered-ipsec-connections.patch
ipv6-addrconf-increment-ifp-refcount-before-ipv6_del_addr.patch
ipv6-fix-traffic-triggered-ipsec-connections.patch
ipv6-flowlabel-do-not-leave-opt-tot_len-with-garbage.patch
l2tp-check-ps-sock-before-running-pppol2tp_session_ioctl.patch
l2tp-hold-tunnel-in-pppol2tp_connect.patch
macsec-fix-memory-leaks-when-skb_to_sgvec-fails.patch
net-bridge-fix-returning-of-vlan-range-op-errors.patch
net-call-cgroup_sk_alloc-earlier-in-sk_clone_lock.patch
net-dsa-check-master-device-before-put.patch
net-mlx5-fix-health-work-queue-spin-lock-to-irq-safe.patch
net-mlx5e-properly-deal-with-encap-flows-add-del-under-neigh-update.patch
net-unix-don-t-show-information-about-sockets-from-other-namespaces.patch
net_sched-avoid-matching-qdisc-with-zero-handle.patch
netlink-do-not-set-cb_running-if-dump-s-start-errs.patch
netlink-fix-netlink_ack-extack-race.patch
packet-avoid-panic-in-packet_getsockopt.patch
ppp-fix-race-in-ppp-device-destruction.patch
sctp-add-the-missing-sock_owned_by_user-check-in-sctp_icmp_redirect.patch
sctp-full-support-for-ipv6-ip_nonlocal_bind-ip_freebind.patch
sctp-reset-owner-sk-for-data-chunks-on-out-queues-when-migrating-a-sock.patch
soreuseport-fix-initialization-race.patch
tap-double-free-in-error-path-in-tap_open.patch
tap-reference-to-kva-of-an-unloaded-module-causes-kernel-panic.patch
tcp-dccp-fix-ireq-opt-races.patch
tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch
tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch
tcp-fix-tcp_mtu_probe-vs-highest_sack.patch
tcp-refresh-tp-timestamp-before-tcp_mtu_probe.patch
tun-allow-positive-return-values-on-dev_get_valid_name-call.patch
tun-call-dev_get_valid_name-before-register_netdevice.patch
tun-tap-sanitize-tunsetsndbuf-input.patch

38 files changed:
queue-4.13/geneve-fix-function-matching-vni-and-tunnel-id-on-big-endian.patch [new file with mode: 0644]
queue-4.13/gso-fix-payload-length-when-gso_size-is-zero.patch [new file with mode: 0644]
queue-4.13/ip6_gre-only-increase-err_count-for-some-certain-type-icmpv6-in-ip6gre_err.patch [new file with mode: 0644]
queue-4.13/ip6_gre-update-dst-pmtu-if-dev-mtu-has-been-updated-by-toobig-in-__gre6_xmit.patch [new file with mode: 0644]
queue-4.13/ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch [new file with mode: 0644]
queue-4.13/ipv4-fix-traffic-triggered-ipsec-connections.patch [new file with mode: 0644]
queue-4.13/ipv6-addrconf-increment-ifp-refcount-before-ipv6_del_addr.patch [new file with mode: 0644]
queue-4.13/ipv6-fix-traffic-triggered-ipsec-connections.patch [new file with mode: 0644]
queue-4.13/ipv6-flowlabel-do-not-leave-opt-tot_len-with-garbage.patch [new file with mode: 0644]
queue-4.13/l2tp-check-ps-sock-before-running-pppol2tp_session_ioctl.patch [new file with mode: 0644]
queue-4.13/l2tp-hold-tunnel-in-pppol2tp_connect.patch [new file with mode: 0644]
queue-4.13/macsec-fix-memory-leaks-when-skb_to_sgvec-fails.patch [new file with mode: 0644]
queue-4.13/net-bridge-fix-returning-of-vlan-range-op-errors.patch [new file with mode: 0644]
queue-4.13/net-call-cgroup_sk_alloc-earlier-in-sk_clone_lock.patch [new file with mode: 0644]
queue-4.13/net-dsa-check-master-device-before-put.patch [new file with mode: 0644]
queue-4.13/net-mlx5-fix-health-work-queue-spin-lock-to-irq-safe.patch [new file with mode: 0644]
queue-4.13/net-mlx5e-properly-deal-with-encap-flows-add-del-under-neigh-update.patch [new file with mode: 0644]
queue-4.13/net-unix-don-t-show-information-about-sockets-from-other-namespaces.patch [new file with mode: 0644]
queue-4.13/net_sched-avoid-matching-qdisc-with-zero-handle.patch [new file with mode: 0644]
queue-4.13/netlink-do-not-set-cb_running-if-dump-s-start-errs.patch [new file with mode: 0644]
queue-4.13/netlink-fix-netlink_ack-extack-race.patch [new file with mode: 0644]
queue-4.13/packet-avoid-panic-in-packet_getsockopt.patch [new file with mode: 0644]
queue-4.13/ppp-fix-race-in-ppp-device-destruction.patch [new file with mode: 0644]
queue-4.13/sctp-add-the-missing-sock_owned_by_user-check-in-sctp_icmp_redirect.patch [new file with mode: 0644]
queue-4.13/sctp-full-support-for-ipv6-ip_nonlocal_bind-ip_freebind.patch [new file with mode: 0644]
queue-4.13/sctp-reset-owner-sk-for-data-chunks-on-out-queues-when-migrating-a-sock.patch [new file with mode: 0644]
queue-4.13/series [new file with mode: 0644]
queue-4.13/soreuseport-fix-initialization-race.patch [new file with mode: 0644]
queue-4.13/tap-double-free-in-error-path-in-tap_open.patch [new file with mode: 0644]
queue-4.13/tap-reference-to-kva-of-an-unloaded-module-causes-kernel-panic.patch [new file with mode: 0644]
queue-4.13/tcp-dccp-fix-ireq-opt-races.patch [new file with mode: 0644]
queue-4.13/tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch [new file with mode: 0644]
queue-4.13/tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch [new file with mode: 0644]
queue-4.13/tcp-fix-tcp_mtu_probe-vs-highest_sack.patch [new file with mode: 0644]
queue-4.13/tcp-refresh-tp-timestamp-before-tcp_mtu_probe.patch [new file with mode: 0644]
queue-4.13/tun-allow-positive-return-values-on-dev_get_valid_name-call.patch [new file with mode: 0644]
queue-4.13/tun-call-dev_get_valid_name-before-register_netdevice.patch [new file with mode: 0644]
queue-4.13/tun-tap-sanitize-tunsetsndbuf-input.patch [new file with mode: 0644]

diff --git a/queue-4.13/geneve-fix-function-matching-vni-and-tunnel-id-on-big-endian.patch b/queue-4.13/geneve-fix-function-matching-vni-and-tunnel-id-on-big-endian.patch
new file mode 100644 (file)
index 0000000..76a4cea
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Stefano Brivio <sbrivio@redhat.com>
+Date: Thu, 19 Oct 2017 13:31:28 +0200
+Subject: geneve: Fix function matching VNI and tunnel ID on big-endian
+
+From: Stefano Brivio <sbrivio@redhat.com>
+
+
+[ Upstream commit 772e97b57a4aa00170ad505a40ffad31d987ce1d ]
+
+On big-endian machines, functions converting between tunnel ID
+and VNI use the three LSBs of tunnel ID storage to map VNI.
+
+The comparison function eq_tun_id_and_vni(), on the other hand,
+attempted to map the VNI from the three MSBs. Fix it by using
+the same check implemented on LE, which maps VNI from the three
+LSBs of tunnel ID.
+
+Fixes: 2e0b26e10352 ("geneve: Optimize geneve device lookup.")
+Signed-off-by: Stefano Brivio <sbrivio@redhat.com>
+Reviewed-by: Jakub Sitnicki <jkbs@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/geneve.c |    6 ------
+ 1 file changed, 6 deletions(-)
+
+--- a/drivers/net/geneve.c
++++ b/drivers/net/geneve.c
+@@ -113,13 +113,7 @@ static void tunnel_id_to_vni(__be64 tun_
+ static bool eq_tun_id_and_vni(u8 *tun_id, u8 *vni)
+ {
+-#ifdef __BIG_ENDIAN
+-      return (vni[0] == tun_id[2]) &&
+-             (vni[1] == tun_id[1]) &&
+-             (vni[2] == tun_id[0]);
+-#else
+       return !memcmp(vni, &tun_id[5], 3);
+-#endif
+ }
+ static sa_family_t geneve_get_sk_family(struct geneve_sock *gs)
diff --git a/queue-4.13/gso-fix-payload-length-when-gso_size-is-zero.patch b/queue-4.13/gso-fix-payload-length-when-gso_size-is-zero.patch
new file mode 100644 (file)
index 0000000..a1fa043
--- /dev/null
@@ -0,0 +1,62 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Alexey Kodanev <alexey.kodanev@oracle.com>
+Date: Fri, 6 Oct 2017 19:02:35 +0300
+Subject: gso: fix payload length when gso_size is zero
+
+From: Alexey Kodanev <alexey.kodanev@oracle.com>
+
+
+[ Upstream commit 3d0241d57c7b25bb75ac9d7a62753642264fdbce ]
+
+When gso_size reset to zero for the tail segment in skb_segment(), later
+in ipv6_gso_segment(), __skb_udp_tunnel_segment() and gre_gso_segment()
+we will get incorrect results (payload length, pcsum) for that segment.
+inet_gso_segment() already has a check for gso_size before calculating
+payload.
+
+The issue was found with LTP vxlan & gre tests over ixgbe NIC.
+
+Fixes: 07b26c9454a2 ("gso: Support partial splitting at the frag_list pointer")
+Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
+Acked-by: Alexander Duyck <alexander.h.duyck@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/gre_offload.c |    2 +-
+ net/ipv4/udp_offload.c |    2 +-
+ net/ipv6/ip6_offload.c |    2 +-
+ 3 files changed, 3 insertions(+), 3 deletions(-)
+
+--- a/net/ipv4/gre_offload.c
++++ b/net/ipv4/gre_offload.c
+@@ -98,7 +98,7 @@ static struct sk_buff *gre_gso_segment(s
+               greh = (struct gre_base_hdr *)skb_transport_header(skb);
+               pcsum = (__sum16 *)(greh + 1);
+-              if (gso_partial) {
++              if (gso_partial && skb_is_gso(skb)) {
+                       unsigned int partial_adj;
+                       /* Adjust checksum to account for the fact that
+--- a/net/ipv4/udp_offload.c
++++ b/net/ipv4/udp_offload.c
+@@ -122,7 +122,7 @@ static struct sk_buff *__skb_udp_tunnel_
+                * will be using a length value equal to only one MSS sized
+                * segment instead of the entire frame.
+                */
+-              if (gso_partial) {
++              if (gso_partial && skb_is_gso(skb)) {
+                       uh->len = htons(skb_shinfo(skb)->gso_size +
+                                       SKB_GSO_CB(skb)->data_offset +
+                                       skb->head - (unsigned char *)uh);
+--- a/net/ipv6/ip6_offload.c
++++ b/net/ipv6/ip6_offload.c
+@@ -105,7 +105,7 @@ static struct sk_buff *ipv6_gso_segment(
+       for (skb = segs; skb; skb = skb->next) {
+               ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff);
+-              if (gso_partial)
++              if (gso_partial && skb_is_gso(skb))
+                       payload_len = skb_shinfo(skb)->gso_size +
+                                     SKB_GSO_CB(skb)->data_offset +
+                                     skb->head - (unsigned char *)(ipv6h + 1);
diff --git a/queue-4.13/ip6_gre-only-increase-err_count-for-some-certain-type-icmpv6-in-ip6gre_err.patch b/queue-4.13/ip6_gre-only-increase-err_count-for-some-certain-type-icmpv6-in-ip6gre_err.patch
new file mode 100644 (file)
index 0000000..b875074
--- /dev/null
@@ -0,0 +1,64 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Thu, 26 Oct 2017 19:23:27 +0800
+Subject: ip6_gre: only increase err_count for some certain type icmpv6 in ip6gre_err
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit f8d20b46ce55cf40afb30dcef6d9288f7ef46d9b ]
+
+The similar fix in patch 'ipip: only increase err_count for some
+certain type icmp in ipip_err' is needed for ip6gre_err.
+
+In Jianlin's case, udp netperf broke even when receiving a TooBig
+icmpv6 packet.
+
+Fixes: c12b395a4664 ("gre: Support GRE over IPv6")
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c |   11 +++++++----
+ 1 file changed, 7 insertions(+), 4 deletions(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -408,13 +408,16 @@ static void ip6gre_err(struct sk_buff *s
+       case ICMPV6_DEST_UNREACH:
+               net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n",
+                                   t->parms.name);
+-              break;
++              if (code != ICMPV6_PORT_UNREACH)
++                      break;
++              return;
+       case ICMPV6_TIME_EXCEED:
+               if (code == ICMPV6_EXC_HOPLIMIT) {
+                       net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n",
+                                           t->parms.name);
++                      break;
+               }
+-              break;
++              return;
+       case ICMPV6_PARAMPROB:
+               teli = 0;
+               if (code == ICMPV6_HDR_FIELD)
+@@ -430,7 +433,7 @@ static void ip6gre_err(struct sk_buff *s
+                       net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n",
+                                           t->parms.name);
+               }
+-              break;
++              return;
+       case ICMPV6_PKT_TOOBIG:
+               mtu = be32_to_cpu(info) - offset - t->tun_hlen;
+               if (t->dev->type == ARPHRD_ETHER)
+@@ -438,7 +441,7 @@ static void ip6gre_err(struct sk_buff *s
+               if (mtu < IPV6_MIN_MTU)
+                       mtu = IPV6_MIN_MTU;
+               t->dev->mtu = mtu;
+-              break;
++              return;
+       }
+       if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO))
diff --git a/queue-4.13/ip6_gre-update-dst-pmtu-if-dev-mtu-has-been-updated-by-toobig-in-__gre6_xmit.patch b/queue-4.13/ip6_gre-update-dst-pmtu-if-dev-mtu-has-been-updated-by-toobig-in-__gre6_xmit.patch
new file mode 100644 (file)
index 0000000..9325540
--- /dev/null
@@ -0,0 +1,68 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Thu, 26 Oct 2017 19:27:17 +0800
+Subject: ip6_gre: update dst pmtu if dev mtu has been updated by toobig in __gre6_xmit
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 8aec4959d832bae0889a8e2f348973b5e4abffef ]
+
+When receiving a Toobig icmpv6 packet, ip6gre_err would just set
+tunnel dev's mtu, that's not enough. For skb_dst(skb)'s pmtu may
+still be using the old value, it has no chance to be updated with
+tunnel dev's mtu.
+
+Jianlin found this issue by reducing route's mtu while running
+netperf, the performance went to 0.
+
+ip6ip6 and ip4ip6 tunnel can work well with this, as they lookup
+the upper dst and update_pmtu it's pmtu or icmpv6_send a Toobig
+to upper socket after setting tunnel dev's mtu.
+
+We couldn't do that for ip6_gre, as gre's inner packet could be
+any protocol, it's difficult to handle them (like lookup upper
+dst) in a good way.
+
+So this patch is to fix it by updating skb_dst(skb)'s pmtu when
+dev->mtu < skb_dst(skb)'s pmtu in tx path. It's safe to do this
+update there, as usually dev->mtu <= skb_dst(skb)'s pmtu and no
+performance regression can be caused by this.
+
+Fixes: c12b395a4664 ("gre: Support GRE over IPv6")
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c |    9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -503,8 +503,8 @@ static netdev_tx_t __gre6_xmit(struct sk
+                              __u32 *pmtu, __be16 proto)
+ {
+       struct ip6_tnl *tunnel = netdev_priv(dev);
+-      __be16 protocol = (dev->type == ARPHRD_ETHER) ?
+-                        htons(ETH_P_TEB) : proto;
++      struct dst_entry *dst = skb_dst(skb);
++      __be16 protocol;
+       if (dev->type == ARPHRD_ETHER)
+               IPCB(skb)->flags = 0;
+@@ -518,9 +518,14 @@ static netdev_tx_t __gre6_xmit(struct sk
+               tunnel->o_seqno++;
+       /* Push GRE header. */
++      protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto;
+       gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags,
+                        protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno));
++      /* TooBig packet may have updated dst->dev's mtu */
++      if (dst && dst_mtu(dst) > dst->dev->mtu)
++              dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
++
+       return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
+                           NEXTHDR_GRE);
+ }
diff --git a/queue-4.13/ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch b/queue-4.13/ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch
new file mode 100644 (file)
index 0000000..59bee6b
--- /dev/null
@@ -0,0 +1,128 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Thu, 26 Oct 2017 19:19:56 +0800
+Subject: ipip: only increase err_count for some certain type icmp in ipip_err
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit f3594f0a7ea36661d7fd942facd7f31a64245f1a ]
+
+t->err_count is used to count the link failure on tunnel and an err
+will be reported to user socket in tx path if t->err_count is not 0.
+udp socket could even return EHOSTUNREACH to users.
+
+Since commit fd58156e456d ("IPIP: Use ip-tunneling code.") removed
+the 'switch check' for icmp type in ipip_err(), err_count would be
+increased by the icmp packet with ICMP_EXC_FRAGTIME code. an link
+failure would be reported out due to this.
+
+In Jianlin's case, when receiving ICMP_EXC_FRAGTIME a icmp packet,
+udp netperf failed with the err:
+  send_data: data send error: No route to host (errno 113)
+
+We expect this error reported from tunnel to socket when receiving
+some certain type icmp, but not ICMP_EXC_FRAGTIME, ICMP_SR_FAILED
+or ICMP_PARAMETERPROB ones.
+
+This patch is to bring 'switch check' for icmp type back to ipip_err
+so that it only reports link failure for the right type icmp, just as
+in ipgre_err() and ipip6_err().
+
+Fixes: fd58156e456d ("IPIP: Use ip-tunneling code.")
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ipip.c |   59 +++++++++++++++++++++++++++++++++++++++-----------------
+ 1 file changed, 42 insertions(+), 17 deletions(-)
+
+--- a/net/ipv4/ipip.c
++++ b/net/ipv4/ipip.c
+@@ -128,43 +128,68 @@ static struct rtnl_link_ops ipip_link_op
+ static int ipip_err(struct sk_buff *skb, u32 info)
+ {
+-
+-/* All the routers (except for Linux) return only
+-   8 bytes of packet payload. It means, that precise relaying of
+-   ICMP in the real Internet is absolutely infeasible.
+- */
++      /* All the routers (except for Linux) return only
++       * 8 bytes of packet payload. It means, that precise relaying of
++       * ICMP in the real Internet is absolutely infeasible.
++       */
+       struct net *net = dev_net(skb->dev);
+       struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);
+       const struct iphdr *iph = (const struct iphdr *)skb->data;
+-      struct ip_tunnel *t;
+-      int err;
+       const int type = icmp_hdr(skb)->type;
+       const int code = icmp_hdr(skb)->code;
++      struct ip_tunnel *t;
++      int err = 0;
++
++      switch (type) {
++      case ICMP_DEST_UNREACH:
++              switch (code) {
++              case ICMP_SR_FAILED:
++                      /* Impossible event. */
++                      goto out;
++              default:
++                      /* All others are translated to HOST_UNREACH.
++                       * rfc2003 contains "deep thoughts" about NET_UNREACH,
++                       * I believe they are just ether pollution. --ANK
++                       */
++                      break;
++              }
++              break;
++
++      case ICMP_TIME_EXCEEDED:
++              if (code != ICMP_EXC_TTL)
++                      goto out;
++              break;
++
++      case ICMP_REDIRECT:
++              break;
++
++      default:
++              goto out;
++      }
+-      err = -ENOENT;
+       t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY,
+                            iph->daddr, iph->saddr, 0);
+-      if (!t)
++      if (!t) {
++              err = -ENOENT;
+               goto out;
++      }
+       if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+-              ipv4_update_pmtu(skb, dev_net(skb->dev), info,
+-                               t->parms.link, 0, iph->protocol, 0);
+-              err = 0;
++              ipv4_update_pmtu(skb, net, info, t->parms.link, 0,
++                               iph->protocol, 0);
+               goto out;
+       }
+       if (type == ICMP_REDIRECT) {
+-              ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,
+-                            iph->protocol, 0);
+-              err = 0;
++              ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0);
+               goto out;
+       }
+-      if (t->parms.iph.daddr == 0)
++      if (t->parms.iph.daddr == 0) {
++              err = -ENOENT;
+               goto out;
++      }
+-      err = 0;
+       if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
+               goto out;
diff --git a/queue-4.13/ipv4-fix-traffic-triggered-ipsec-connections.patch b/queue-4.13/ipv4-fix-traffic-triggered-ipsec-connections.patch
new file mode 100644 (file)
index 0000000..20058fe
--- /dev/null
@@ -0,0 +1,40 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Steffen Klassert <steffen.klassert@secunet.com>
+Date: Mon, 9 Oct 2017 08:43:55 +0200
+Subject: ipv4: Fix traffic triggered IPsec connections.
+
+From: Steffen Klassert <steffen.klassert@secunet.com>
+
+
+[ Upstream commit 6c0e7284d89995877740d8a26c3e99a937312a3c ]
+
+A recent patch removed the dst_free() on the allocated
+dst_entry in ipv4_blackhole_route(). The dst_free() marked the
+dst_entry as dead and added it to the gc list. I.e. it was setup
+for a one time usage. As a result we may now have a blackhole
+route cached at a socket on some IPsec scenarios. This makes the
+connection unusable.
+
+Fix this by marking the dst_entry directly at allocation time
+as 'dead', so it is used only once.
+
+Fixes: b838d5e1c5b6 ("ipv4: mark DST_NOGC and remove the operation of dst_free()")
+Reported-by: Tobias Brunner <tobias@strongswan.org>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/route.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -2507,7 +2507,7 @@ struct dst_entry *ipv4_blackhole_route(s
+       struct rtable *ort = (struct rtable *) dst_orig;
+       struct rtable *rt;
+-      rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0);
++      rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0);
+       if (rt) {
+               struct dst_entry *new = &rt->dst;
diff --git a/queue-4.13/ipv6-addrconf-increment-ifp-refcount-before-ipv6_del_addr.patch b/queue-4.13/ipv6-addrconf-increment-ifp-refcount-before-ipv6_del_addr.patch
new file mode 100644 (file)
index 0000000..8bb2864
--- /dev/null
@@ -0,0 +1,95 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 30 Oct 2017 22:47:09 -0700
+Subject: ipv6: addrconf: increment ifp refcount before ipv6_del_addr()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit e669b86945478b3d90d2d87e3793a6eed06d332f ]
+
+In the (unlikely) event fixup_permanent_addr() returns a failure,
+addrconf_permanent_addr() calls ipv6_del_addr() without the
+mandatory call to in6_ifa_hold(), leading to a refcount error,
+spotted by syzkaller :
+
+WARNING: CPU: 1 PID: 3142 at lib/refcount.c:227 refcount_dec+0x4c/0x50
+lib/refcount.c:227
+Kernel panic - not syncing: panic_on_warn set ...
+
+CPU: 1 PID: 3142 Comm: ip Not tainted 4.14.0-rc4-next-20171009+ #33
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS
+Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:16 [inline]
+ dump_stack+0x194/0x257 lib/dump_stack.c:52
+ panic+0x1e4/0x41c kernel/panic.c:181
+ __warn+0x1c4/0x1e0 kernel/panic.c:544
+ report_bug+0x211/0x2d0 lib/bug.c:183
+ fixup_bug+0x40/0x90 arch/x86/kernel/traps.c:178
+ do_trap_no_signal arch/x86/kernel/traps.c:212 [inline]
+ do_trap+0x260/0x390 arch/x86/kernel/traps.c:261
+ do_error_trap+0x120/0x390 arch/x86/kernel/traps.c:298
+ do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:311
+ invalid_op+0x18/0x20 arch/x86/entry/entry_64.S:905
+RIP: 0010:refcount_dec+0x4c/0x50 lib/refcount.c:227
+RSP: 0018:ffff8801ca49e680 EFLAGS: 00010286
+RAX: 000000000000002c RBX: ffff8801d07cfcdc RCX: 0000000000000000
+RDX: 000000000000002c RSI: 1ffff10039493c90 RDI: ffffed0039493cc4
+RBP: ffff8801ca49e688 R08: ffff8801ca49dd70 R09: 0000000000000000
+R10: ffff8801ca49df58 R11: 0000000000000000 R12: 1ffff10039493cd9
+R13: ffff8801ca49e6e8 R14: ffff8801ca49e7e8 R15: ffff8801d07cfcdc
+ __in6_ifa_put include/net/addrconf.h:369 [inline]
+ ipv6_del_addr+0x42b/0xb60 net/ipv6/addrconf.c:1208
+ addrconf_permanent_addr net/ipv6/addrconf.c:3327 [inline]
+ addrconf_notify+0x1c66/0x2190 net/ipv6/addrconf.c:3393
+ notifier_call_chain+0x136/0x2c0 kernel/notifier.c:93
+ __raw_notifier_call_chain kernel/notifier.c:394 [inline]
+ raw_notifier_call_chain+0x2d/0x40 kernel/notifier.c:401
+ call_netdevice_notifiers_info+0x32/0x60 net/core/dev.c:1697
+ call_netdevice_notifiers net/core/dev.c:1715 [inline]
+ __dev_notify_flags+0x15d/0x430 net/core/dev.c:6843
+ dev_change_flags+0xf5/0x140 net/core/dev.c:6879
+ do_setlink+0xa1b/0x38e0 net/core/rtnetlink.c:2113
+ rtnl_newlink+0xf0d/0x1a40 net/core/rtnetlink.c:2661
+ rtnetlink_rcv_msg+0x733/0x1090 net/core/rtnetlink.c:4301
+ netlink_rcv_skb+0x216/0x440 net/netlink/af_netlink.c:2408
+ rtnetlink_rcv+0x1c/0x20 net/core/rtnetlink.c:4313
+ netlink_unicast_kernel net/netlink/af_netlink.c:1273 [inline]
+ netlink_unicast+0x4e8/0x6f0 net/netlink/af_netlink.c:1299
+ netlink_sendmsg+0xa4a/0xe70 net/netlink/af_netlink.c:1862
+ sock_sendmsg_nosec net/socket.c:633 [inline]
+ sock_sendmsg+0xca/0x110 net/socket.c:643
+ ___sys_sendmsg+0x75b/0x8a0 net/socket.c:2049
+ __sys_sendmsg+0xe5/0x210 net/socket.c:2083
+ SYSC_sendmsg net/socket.c:2094 [inline]
+ SyS_sendmsg+0x2d/0x50 net/socket.c:2090
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+RIP: 0033:0x7fa9174d3320
+RSP: 002b:00007ffe302ae9e8 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
+RAX: ffffffffffffffda RBX: 00007ffe302b2ae0 RCX: 00007fa9174d3320
+RDX: 0000000000000000 RSI: 00007ffe302aea20 RDI: 0000000000000016
+RBP: 0000000000000082 R08: 0000000000000000 R09: 000000000000000f
+R10: 0000000000000000 R11: 0000000000000246 R12: 00007ffe302b32a0
+R13: 0000000000000000 R14: 00007ffe302b2ab8 R15: 00007ffe302b32b8
+
+Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: David Ahern <dsahern@gmail.com>
+Acked-by: David Ahern <dsahern@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/addrconf.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv6/addrconf.c
++++ b/net/ipv6/addrconf.c
+@@ -3367,6 +3367,7 @@ static void addrconf_permanent_addr(stru
+               if ((ifp->flags & IFA_F_PERMANENT) &&
+                   fixup_permanent_addr(idev, ifp) < 0) {
+                       write_unlock_bh(&idev->lock);
++                      in6_ifa_hold(ifp);
+                       ipv6_del_addr(ifp);
+                       write_lock_bh(&idev->lock);
diff --git a/queue-4.13/ipv6-fix-traffic-triggered-ipsec-connections.patch b/queue-4.13/ipv6-fix-traffic-triggered-ipsec-connections.patch
new file mode 100644 (file)
index 0000000..d44d1cf
--- /dev/null
@@ -0,0 +1,40 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Steffen Klassert <steffen.klassert@secunet.com>
+Date: Mon, 9 Oct 2017 08:39:43 +0200
+Subject: ipv6: Fix traffic triggered IPsec connections.
+
+From: Steffen Klassert <steffen.klassert@secunet.com>
+
+
+[ Upstream commit 62cf27e52b8c9a39066172ca6b6134cb5eaa9450 ]
+
+A recent patch removed the dst_free() on the allocated
+dst_entry in ipv6_blackhole_route(). The dst_free() marked
+the dst_entry as dead and added it to the gc list. I.e. it
+was setup for a one time usage. As a result we may now have
+a blackhole route cached at a socket on some IPsec scenarios.
+This makes the connection unusable.
+
+Fix this by marking the dst_entry directly at allocation time
+as 'dead', so it is used only once.
+
+Fixes: 587fea741134 ("ipv6: mark DST_NOGC and remove the operation of dst_free()")
+Reported-by: Tobias Brunner <tobias@strongswan.org>
+Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/route.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -1251,7 +1251,7 @@ struct dst_entry *ip6_blackhole_route(st
+       struct dst_entry *new = NULL;
+       rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1,
+-                     DST_OBSOLETE_NONE, 0);
++                     DST_OBSOLETE_DEAD, 0);
+       if (rt) {
+               rt6_info_init(rt);
diff --git a/queue-4.13/ipv6-flowlabel-do-not-leave-opt-tot_len-with-garbage.patch b/queue-4.13/ipv6-flowlabel-do-not-leave-opt-tot_len-with-garbage.patch
new file mode 100644 (file)
index 0000000..dd3ff4b
--- /dev/null
@@ -0,0 +1,104 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Sat, 21 Oct 2017 12:26:23 -0700
+Subject: ipv6: flowlabel: do not leave opt->tot_len with garbage
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 864e2a1f8aac05effac6063ce316b480facb46ff ]
+
+When syzkaller team brought us a C repro for the crash [1] that
+had been reported many times in the past, I finally could find
+the root cause.
+
+If FlowLabel info is merged by fl6_merge_options(), we leave
+part of the opt_space storage provided by udp/raw/l2tp with random value
+in opt_space.tot_len, unless a control message was provided at sendmsg()
+time.
+
+Then ip6_setup_cork() would use this random value to perform a kzalloc()
+call. Undefined behavior and crashes.
+
+Fix is to properly set tot_len in fl6_merge_options()
+
+At the same time, we can also avoid consuming memory and cpu cycles
+to clear it, if every option is copied via a kmemdup(). This is the
+change in ip6_setup_cork().
+
+[1]
+kasan: CONFIG_KASAN_INLINE enabled
+kasan: GPF could be caused by NULL-ptr deref or user memory access
+general protection fault: 0000 [#1] SMP KASAN
+Dumping ftrace buffer:
+   (ftrace buffer empty)
+Modules linked in:
+CPU: 0 PID: 6613 Comm: syz-executor0 Not tainted 4.14.0-rc4+ #127
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+task: ffff8801cb64a100 task.stack: ffff8801cc350000
+RIP: 0010:ip6_setup_cork+0x274/0x15c0 net/ipv6/ip6_output.c:1168
+RSP: 0018:ffff8801cc357550 EFLAGS: 00010203
+RAX: dffffc0000000000 RBX: ffff8801cc357748 RCX: 0000000000000010
+RDX: 0000000000000002 RSI: ffffffff842bd1d9 RDI: 0000000000000014
+RBP: ffff8801cc357620 R08: ffff8801cb17f380 R09: ffff8801cc357b10
+R10: ffff8801cb64a100 R11: 0000000000000000 R12: ffff8801cc357ab0
+R13: ffff8801cc357b10 R14: 0000000000000000 R15: ffff8801c3bbf0c0
+FS:  00007f9c5c459700(0000) GS:ffff8801db200000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000000020324000 CR3: 00000001d1cf2000 CR4: 00000000001406f0
+DR0: 0000000020001010 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000600
+Call Trace:
+ ip6_make_skb+0x282/0x530 net/ipv6/ip6_output.c:1729
+ udpv6_sendmsg+0x2769/0x3380 net/ipv6/udp.c:1340
+ inet_sendmsg+0x11f/0x5e0 net/ipv4/af_inet.c:762
+ sock_sendmsg_nosec net/socket.c:633 [inline]
+ sock_sendmsg+0xca/0x110 net/socket.c:643
+ SYSC_sendto+0x358/0x5a0 net/socket.c:1750
+ SyS_sendto+0x40/0x50 net/socket.c:1718
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+RIP: 0033:0x4520a9
+RSP: 002b:00007f9c5c458c08 EFLAGS: 00000216 ORIG_RAX: 000000000000002c
+RAX: ffffffffffffffda RBX: 0000000000718000 RCX: 00000000004520a9
+RDX: 0000000000000001 RSI: 0000000020fd1000 RDI: 0000000000000016
+RBP: 0000000000000086 R08: 0000000020e0afe4 R09: 000000000000001c
+R10: 0000000000000000 R11: 0000000000000216 R12: 00000000004bb1ee
+R13: 00000000ffffffff R14: 0000000000000016 R15: 0000000000000029
+Code: e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85 ea 0f 00 00 48 8d 79 04 48 b8 00 00 00 00 00 fc ff df 45 8b 74 24 04 48 89 fa 48 c1 ea 03 <0f> b6 14 02 48 89 f8 83 e0 07 83 c0 03 38 d0 7c 08 84 d2 0f 85
+RIP: ip6_setup_cork+0x274/0x15c0 net/ipv6/ip6_output.c:1168 RSP: ffff8801cc357550
+
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_flowlabel.c |    1 +
+ net/ipv6/ip6_output.c    |    4 ++--
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/ip6_flowlabel.c
++++ b/net/ipv6/ip6_flowlabel.c
+@@ -315,6 +315,7 @@ struct ipv6_txoptions *fl6_merge_options
+       }
+       opt_space->dst1opt = fopt->dst1opt;
+       opt_space->opt_flen = fopt->opt_flen;
++      opt_space->tot_len = fopt->tot_len;
+       return opt_space;
+ }
+ EXPORT_SYMBOL_GPL(fl6_merge_options);
+--- a/net/ipv6/ip6_output.c
++++ b/net/ipv6/ip6_output.c
+@@ -1224,11 +1224,11 @@ static int ip6_setup_cork(struct sock *s
+               if (WARN_ON(v6_cork->opt))
+                       return -EINVAL;
+-              v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
++              v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
+               if (unlikely(!v6_cork->opt))
+                       return -ENOBUFS;
+-              v6_cork->opt->tot_len = opt->tot_len;
++              v6_cork->opt->tot_len = sizeof(*opt);
+               v6_cork->opt->opt_flen = opt->opt_flen;
+               v6_cork->opt->opt_nflen = opt->opt_nflen;
diff --git a/queue-4.13/l2tp-check-ps-sock-before-running-pppol2tp_session_ioctl.patch b/queue-4.13/l2tp-check-ps-sock-before-running-pppol2tp_session_ioctl.patch
new file mode 100644 (file)
index 0000000..cecd2a3
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Guillaume Nault <g.nault@alphalink.fr>
+Date: Fri, 13 Oct 2017 19:22:35 +0200
+Subject: l2tp: check ps->sock before running pppol2tp_session_ioctl()
+
+From: Guillaume Nault <g.nault@alphalink.fr>
+
+
+[ Upstream commit 5903f594935a3841137c86b9d5b75143a5b7121c ]
+
+When pppol2tp_session_ioctl() is called by pppol2tp_tunnel_ioctl(),
+the session may be unconnected. That is, it was created by
+pppol2tp_session_create() and hasn't been connected with
+pppol2tp_connect(). In this case, ps->sock is NULL, so we need to check
+for this case in order to avoid dereferencing a NULL pointer.
+
+Fixes: 309795f4bec2 ("l2tp: Add netlink control API for L2TP")
+Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/l2tp/l2tp_ppp.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/l2tp/l2tp_ppp.c
++++ b/net/l2tp/l2tp_ppp.c
+@@ -993,6 +993,9 @@ static int pppol2tp_session_ioctl(struct
+                session->name, cmd, arg);
+       sk = ps->sock;
++      if (!sk)
++              return -EBADR;
++
+       sock_hold(sk);
+       switch (cmd) {
diff --git a/queue-4.13/l2tp-hold-tunnel-in-pppol2tp_connect.patch b/queue-4.13/l2tp-hold-tunnel-in-pppol2tp_connect.patch
new file mode 100644 (file)
index 0000000..8f2dd23
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Guillaume Nault <g.nault@alphalink.fr>
+Date: Mon, 30 Oct 2017 17:58:58 +0100
+Subject: l2tp: hold tunnel in pppol2tp_connect()
+
+From: Guillaume Nault <g.nault@alphalink.fr>
+
+
+[ Upstream commit f9e56baf03f9d36043a78f16e3e8b2cfd211e09e ]
+
+Use l2tp_tunnel_get() in pppol2tp_connect() to ensure the tunnel isn't
+going to disappear while processing the rest of the function.
+
+Fixes: fd558d186df2 ("l2tp: Split pppol2tp patch into separate l2tp and ppp parts")
+Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/l2tp/l2tp_ppp.c |    7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/net/l2tp/l2tp_ppp.c
++++ b/net/l2tp/l2tp_ppp.c
+@@ -584,6 +584,7 @@ static int pppol2tp_connect(struct socke
+       u32 tunnel_id, peer_tunnel_id;
+       u32 session_id, peer_session_id;
+       bool drop_refcnt = false;
++      bool drop_tunnel = false;
+       int ver = 2;
+       int fd;
+@@ -652,7 +653,9 @@ static int pppol2tp_connect(struct socke
+       if (tunnel_id == 0)
+               goto end;
+-      tunnel = l2tp_tunnel_find(sock_net(sk), tunnel_id);
++      tunnel = l2tp_tunnel_get(sock_net(sk), tunnel_id);
++      if (tunnel)
++              drop_tunnel = true;
+       /* Special case: create tunnel context if session_id and
+        * peer_session_id is 0. Otherwise look up tunnel using supplied
+@@ -781,6 +784,8 @@ out_no_ppp:
+ end:
+       if (drop_refcnt)
+               l2tp_session_dec_refcount(session);
++      if (drop_tunnel)
++              l2tp_tunnel_dec_refcount(tunnel);
+       release_sock(sk);
+       return error;
diff --git a/queue-4.13/macsec-fix-memory-leaks-when-skb_to_sgvec-fails.patch b/queue-4.13/macsec-fix-memory-leaks-when-skb_to_sgvec-fails.patch
new file mode 100644 (file)
index 0000000..e60aea4
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Sabrina Dubroca <sd@queasysnail.net>
+Date: Tue, 10 Oct 2017 17:07:12 +0200
+Subject: macsec: fix memory leaks when skb_to_sgvec fails
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+
+[ Upstream commit 5aba2ba5030b66a6f8c93049b718556f9aacd7c6 ]
+
+Fixes: cda7ea690350 ("macsec: check return value of skb_to_sgvec always")
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/macsec.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/net/macsec.c
++++ b/drivers/net/macsec.c
+@@ -742,6 +742,7 @@ static struct sk_buff *macsec_encrypt(st
+       sg_init_table(sg, ret);
+       ret = skb_to_sgvec(skb, sg, 0, skb->len);
+       if (unlikely(ret < 0)) {
++              aead_request_free(req);
+               macsec_txsa_put(tx_sa);
+               kfree_skb(skb);
+               return ERR_PTR(ret);
+@@ -954,6 +955,7 @@ static struct sk_buff *macsec_decrypt(st
+       sg_init_table(sg, ret);
+       ret = skb_to_sgvec(skb, sg, 0, skb->len);
+       if (unlikely(ret < 0)) {
++              aead_request_free(req);
+               kfree_skb(skb);
+               return ERR_PTR(ret);
+       }
diff --git a/queue-4.13/net-bridge-fix-returning-of-vlan-range-op-errors.patch b/queue-4.13/net-bridge-fix-returning-of-vlan-range-op-errors.patch
new file mode 100644 (file)
index 0000000..b55fbda
--- /dev/null
@@ -0,0 +1,34 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Date: Thu, 19 Oct 2017 20:17:32 +0300
+Subject: net: bridge: fix returning of vlan range op errors
+
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+
+
+[ Upstream commit 66c54517540cedf5a22911c6b7f5c7d8b5d1e1be ]
+
+When vlan tunnels were introduced, vlan range errors got silently
+dropped and instead 0 was returned always. Restore the previous
+behaviour and return errors to user-space.
+
+Fixes: efa5356b0d97 ("bridge: per vlan dst_metadata netlink support")
+Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/bridge/br_netlink.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/bridge/br_netlink.c
++++ b/net/bridge/br_netlink.c
+@@ -573,7 +573,7 @@ static int br_process_vlan_info(struct n
+               }
+               *vinfo_last = NULL;
+-              return 0;
++              return err;
+       }
+       return br_vlan_info(br, p, cmd, vinfo_curr);
diff --git a/queue-4.13/net-call-cgroup_sk_alloc-earlier-in-sk_clone_lock.patch b/queue-4.13/net-call-cgroup_sk_alloc-earlier-in-sk_clone_lock.patch
new file mode 100644 (file)
index 0000000..ef4448b
--- /dev/null
@@ -0,0 +1,44 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 10 Oct 2017 19:12:33 -0700
+Subject: net: call cgroup_sk_alloc() earlier in sk_clone_lock()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit c0576e3975084d4699b7bfef578613fb8e1144f6 ]
+
+If for some reason, the newly allocated child need to be freed,
+we will call cgroup_put() (via sk_free_unlock_clone()) while the
+corresponding cgroup_get() was not yet done, and we will free memory
+too soon.
+
+Fixes: d979a39d7242 ("cgroup: duplicate cgroup reference when cloning sockets")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Johannes Weiner <hannes@cmpxchg.org>
+Cc: Tejun Heo <tj@kernel.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1674,6 +1674,7 @@ struct sock *sk_clone_lock(const struct
+               newsk->sk_userlocks     = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
+               sock_reset_flag(newsk, SOCK_DONE);
++              cgroup_sk_alloc(&newsk->sk_cgrp_data);
+               rcu_read_lock();
+               filter = rcu_dereference(sk->sk_filter);
+@@ -1706,8 +1707,6 @@ struct sock *sk_clone_lock(const struct
+               atomic64_set(&newsk->sk_cookie, 0);
+               mem_cgroup_sk_alloc(newsk);
+-              cgroup_sk_alloc(&newsk->sk_cgrp_data);
+-
+               /*
+                * Before updating sk_refcnt, we must commit prior changes to memory
+                * (Documentation/RCU/rculist_nulls.txt for details)
diff --git a/queue-4.13/net-dsa-check-master-device-before-put.patch b/queue-4.13/net-dsa-check-master-device-before-put.patch
new file mode 100644 (file)
index 0000000..935eb5d
--- /dev/null
@@ -0,0 +1,43 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+Date: Tue, 24 Oct 2017 16:37:19 -0400
+Subject: net: dsa: check master device before put
+
+From: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+
+
+[ Upstream commit 3eb8feeb1708c7dbfd2e97df92a2a407c116606e ]
+
+In the case of pdata, the dsa_cpu_parse function calls dev_put() before
+making sure it isn't NULL. Fix this.
+
+Fixes: 71e0bbde0d88 ("net: dsa: Add support for platform data")
+Signed-off-by: Vivien Didelot <vivien.didelot@savoirfairelinux.com>
+Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dsa/dsa2.c |    7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+--- a/net/dsa/dsa2.c
++++ b/net/dsa/dsa2.c
+@@ -496,14 +496,15 @@ static int dsa_cpu_parse(struct dsa_port
+               if (!ethernet)
+                       return -EINVAL;
+               ethernet_dev = of_find_net_device_by_node(ethernet);
++              if (!ethernet_dev)
++                      return -EPROBE_DEFER;
+       } else {
+               ethernet_dev = dsa_dev_to_net_device(ds->cd->netdev[index]);
++              if (!ethernet_dev)
++                      return -EPROBE_DEFER;
+               dev_put(ethernet_dev);
+       }
+-      if (!ethernet_dev)
+-              return -EPROBE_DEFER;
+-
+       if (!dst->cpu_dp) {
+               dst->cpu_dp = port;
+               dst->cpu_dp->netdev = ethernet_dev;
diff --git a/queue-4.13/net-mlx5-fix-health-work-queue-spin-lock-to-irq-safe.patch b/queue-4.13/net-mlx5-fix-health-work-queue-spin-lock-to-irq-safe.patch
new file mode 100644 (file)
index 0000000..b452695
--- /dev/null
@@ -0,0 +1,51 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Moshe Shemesh <moshe@mellanox.com>
+Date: Thu, 19 Oct 2017 14:14:29 +0300
+Subject: net/mlx5: Fix health work queue spin lock to IRQ safe
+
+From: Moshe Shemesh <moshe@mellanox.com>
+
+
+[ Upstream commit 6377ed0bbae6fa28853e1679d068a9106c8a8908 ]
+
+spin_lock/unlock of health->wq_lock should be IRQ safe.
+It was changed to spin_lock_irqsave since adding commit 0179720d6be2
+("net/mlx5: Introduce trigger_health_work function") which uses
+spin_lock from asynchronous event (IRQ) context.
+Thus, all spin_lock/unlock of health->wq_lock should have been moved
+to IRQ safe mode.
+However, one occurrence on new code using this lock missed that
+change, resulting in possible deadlock:
+  kernel: Possible unsafe locking scenario:
+  kernel:       CPU0
+  kernel:       ----
+  kernel:  lock(&(&health->wq_lock)->rlock);
+  kernel:  <Interrupt>
+  kernel:    lock(&(&health->wq_lock)->rlock);
+  kernel: #012 *** DEADLOCK ***
+
+Fixes: 2a0165a034ac ("net/mlx5: Cancel delayed recovery work when unloading the driver")
+Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/health.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
+@@ -356,10 +356,11 @@ void mlx5_drain_health_wq(struct mlx5_co
+ void mlx5_drain_health_recovery(struct mlx5_core_dev *dev)
+ {
+       struct mlx5_core_health *health = &dev->priv.health;
++      unsigned long flags;
+-      spin_lock(&health->wq_lock);
++      spin_lock_irqsave(&health->wq_lock, flags);
+       set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
+-      spin_unlock(&health->wq_lock);
++      spin_unlock_irqrestore(&health->wq_lock, flags);
+       cancel_delayed_work_sync(&dev->priv.health.recover_work);
+ }
diff --git a/queue-4.13/net-mlx5e-properly-deal-with-encap-flows-add-del-under-neigh-update.patch b/queue-4.13/net-mlx5e-properly-deal-with-encap-flows-add-del-under-neigh-update.patch
new file mode 100644 (file)
index 0000000..87afee6
--- /dev/null
@@ -0,0 +1,247 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Or Gerlitz <ogerlitz@mellanox.com>
+Date: Tue, 17 Oct 2017 12:33:43 +0200
+Subject: net/mlx5e: Properly deal with encap flows add/del under neigh update
+
+From: Or Gerlitz <ogerlitz@mellanox.com>
+
+
+[ Upstream commit 3c37745ec614ff048d5dce38f976804b05d307ee ]
+
+Currently, the encap action offload is handled in the actions parse
+function and not in mlx5e_tc_add_fdb_flow() where we deal with all
+the other aspects of offloading actions (vlan, modify header) and
+the rule itself.
+
+When the neigh update code (mlx5e_tc_encap_flows_add()) recreates the
+encap entry and offloads the related flows, we wrongly call again into
+mlx5e_tc_add_fdb_flow(), this for itself would cause us to handle
+again the offloading of vlans and header re-write which puts things
+in non consistent state and step on freed memory (e.g the modify
+header parse buffer which is already freed).
+
+Since on error, mlx5e_tc_add_fdb_flow() detaches and may release the
+encap entry, it causes a corruption at the neigh update code which goes
+over the list of flows associated with this encap entry, or double free
+when the tc flow is later deleted by user-space.
+
+When neigh update (mlx5e_tc_encap_flows_del()) unoffloads the flows related
+to an encap entry which is now invalid, we do a partial repeat of the eswitch
+flow removal code which is wrong too.
+
+To fix things up we do the following:
+
+(1) handle the encap action offload in the eswitch flow add function
+    mlx5e_tc_add_fdb_flow() as done for the other actions and the rule itself.
+
+(2) modify the neigh update code (mlx5e_tc_encap_flows_add/del) to only
+    deal with the encap entry and rules delete/add and not with any of
+    the other offloaded actions.
+
+Fixes: 232c001398ae ('net/mlx5e: Add support to neighbour update flow')
+Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
+Reviewed-by: Paul Blakey <paulb@mellanox.com>
+Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlx5/core/en_tc.c |   89 ++++++++++++++----------
+ 1 file changed, 54 insertions(+), 35 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
++++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+@@ -78,9 +78,11 @@ struct mlx5e_tc_flow {
+ };
+ struct mlx5e_tc_flow_parse_attr {
++      struct ip_tunnel_info tun_info;
+       struct mlx5_flow_spec spec;
+       int num_mod_hdr_actions;
+       void *mod_hdr_actions;
++      int mirred_ifindex;
+ };
+ enum {
+@@ -322,6 +324,12 @@ static void mlx5e_tc_del_nic_flow(struct
+ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
+                              struct mlx5e_tc_flow *flow);
++static int mlx5e_attach_encap(struct mlx5e_priv *priv,
++                            struct ip_tunnel_info *tun_info,
++                            struct net_device *mirred_dev,
++                            struct net_device **encap_dev,
++                            struct mlx5e_tc_flow *flow);
++
+ static struct mlx5_flow_handle *
+ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
+                     struct mlx5e_tc_flow_parse_attr *parse_attr,
+@@ -329,9 +337,27 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv
+ {
+       struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+       struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+-      struct mlx5_flow_handle *rule;
++      struct net_device *out_dev, *encap_dev = NULL;
++      struct mlx5_flow_handle *rule = NULL;
++      struct mlx5e_rep_priv *rpriv;
++      struct mlx5e_priv *out_priv;
+       int err;
++      if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP) {
++              out_dev = __dev_get_by_index(dev_net(priv->netdev),
++                                           attr->parse_attr->mirred_ifindex);
++              err = mlx5e_attach_encap(priv, &parse_attr->tun_info,
++                                       out_dev, &encap_dev, flow);
++              if (err) {
++                      rule = ERR_PTR(err);
++                      if (err != -EAGAIN)
++                              goto err_attach_encap;
++              }
++              out_priv = netdev_priv(encap_dev);
++              rpriv = out_priv->ppriv;
++              attr->out_rep = rpriv->rep;
++      }
++
+       err = mlx5_eswitch_add_vlan_action(esw, attr);
+       if (err) {
+               rule = ERR_PTR(err);
+@@ -347,10 +373,14 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv
+               }
+       }
+-      rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr);
+-      if (IS_ERR(rule))
+-              goto err_add_rule;
+-
++      /* we get here if (1) there's no error (rule being null) or when
++       * (2) there's an encap action and we're on -EAGAIN (no valid neigh)
++       */
++      if (rule != ERR_PTR(-EAGAIN)) {
++              rule = mlx5_eswitch_add_offloaded_rule(esw, &parse_attr->spec, attr);
++              if (IS_ERR(rule))
++                      goto err_add_rule;
++      }
+       return rule;
+ err_add_rule:
+@@ -361,6 +391,7 @@ err_mod_hdr:
+ err_add_vlan:
+       if (attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
+               mlx5e_detach_encap(priv, flow);
++err_attach_encap:
+       return rule;
+ }
+@@ -389,6 +420,8 @@ static void mlx5e_tc_del_fdb_flow(struct
+ void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
+                             struct mlx5e_encap_entry *e)
+ {
++      struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
++      struct mlx5_esw_flow_attr *esw_attr;
+       struct mlx5e_tc_flow *flow;
+       int err;
+@@ -404,10 +437,9 @@ void mlx5e_tc_encap_flows_add(struct mlx
+       mlx5e_rep_queue_neigh_stats_work(priv);
+       list_for_each_entry(flow, &e->flows, encap) {
+-              flow->esw_attr->encap_id = e->encap_id;
+-              flow->rule = mlx5e_tc_add_fdb_flow(priv,
+-                                                 flow->esw_attr->parse_attr,
+-                                                 flow);
++              esw_attr = flow->esw_attr;
++              esw_attr->encap_id = e->encap_id;
++              flow->rule = mlx5_eswitch_add_offloaded_rule(esw, &esw_attr->parse_attr->spec, esw_attr);
+               if (IS_ERR(flow->rule)) {
+                       err = PTR_ERR(flow->rule);
+                       mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
+@@ -421,15 +453,13 @@ void mlx5e_tc_encap_flows_add(struct mlx
+ void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
+                             struct mlx5e_encap_entry *e)
+ {
++      struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+       struct mlx5e_tc_flow *flow;
+-      struct mlx5_fc *counter;
+       list_for_each_entry(flow, &e->flows, encap) {
+               if (flow->flags & MLX5E_TC_FLOW_OFFLOADED) {
+                       flow->flags &= ~MLX5E_TC_FLOW_OFFLOADED;
+-                      counter = mlx5_flow_rule_counter(flow->rule);
+-                      mlx5_del_flow_rules(flow->rule);
+-                      mlx5_fc_destroy(priv->mdev, counter);
++                      mlx5_eswitch_del_offloaded_rule(esw, flow->rule, flow->esw_attr);
+               }
+       }
+@@ -1871,7 +1901,7 @@ static int parse_tc_fdb_actions(struct m
+               if (is_tcf_mirred_egress_redirect(a)) {
+                       int ifindex = tcf_mirred_ifindex(a);
+-                      struct net_device *out_dev, *encap_dev = NULL;
++                      struct net_device *out_dev;
+                       struct mlx5e_priv *out_priv;
+                       out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex);
+@@ -1884,17 +1914,13 @@ static int parse_tc_fdb_actions(struct m
+                               rpriv = out_priv->ppriv;
+                               attr->out_rep = rpriv->rep;
+                       } else if (encap) {
+-                              err = mlx5e_attach_encap(priv, info,
+-                                                       out_dev, &encap_dev, flow);
+-                              if (err && err != -EAGAIN)
+-                                      return err;
++                              parse_attr->mirred_ifindex = ifindex;
++                              parse_attr->tun_info = *info;
++                              attr->parse_attr = parse_attr;
+                               attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
+                                       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+                                       MLX5_FLOW_CONTEXT_ACTION_COUNT;
+-                              out_priv = netdev_priv(encap_dev);
+-                              rpriv = out_priv->ppriv;
+-                              attr->out_rep = rpriv->rep;
+-                              attr->parse_attr = parse_attr;
++                              /* attr->out_rep is resolved when we handle encap */
+                       } else {
+                               pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
+                                      priv->netdev->name, out_dev->name);
+@@ -1972,7 +1998,7 @@ int mlx5e_configure_flower(struct mlx5e_
+       if (flow->flags & MLX5E_TC_FLOW_ESWITCH) {
+               err = parse_tc_fdb_actions(priv, f->exts, parse_attr, flow);
+               if (err < 0)
+-                      goto err_handle_encap_flow;
++                      goto err_free;
+               flow->rule = mlx5e_tc_add_fdb_flow(priv, parse_attr, flow);
+       } else {
+               err = parse_tc_nic_actions(priv, f->exts, parse_attr, flow);
+@@ -1983,10 +2009,13 @@ int mlx5e_configure_flower(struct mlx5e_
+       if (IS_ERR(flow->rule)) {
+               err = PTR_ERR(flow->rule);
+-              goto err_free;
++              if (err != -EAGAIN)
++                      goto err_free;
+       }
+-      flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
++      if (err != -EAGAIN)
++              flow->flags |= MLX5E_TC_FLOW_OFFLOADED;
++
+       err = rhashtable_insert_fast(&tc->ht, &flow->node,
+                                    tc->ht_params);
+       if (err)
+@@ -2000,16 +2029,6 @@ int mlx5e_configure_flower(struct mlx5e_
+ err_del_rule:
+       mlx5e_tc_del_flow(priv, flow);
+-err_handle_encap_flow:
+-      if (err == -EAGAIN) {
+-              err = rhashtable_insert_fast(&tc->ht, &flow->node,
+-                                           tc->ht_params);
+-              if (err)
+-                      mlx5e_tc_del_flow(priv, flow);
+-              else
+-                      return 0;
+-      }
+-
+ err_free:
+       kvfree(parse_attr);
+       kfree(flow);
diff --git a/queue-4.13/net-unix-don-t-show-information-about-sockets-from-other-namespaces.patch b/queue-4.13/net-unix-don-t-show-information-about-sockets-from-other-namespaces.patch
new file mode 100644 (file)
index 0000000..00b7dbe
--- /dev/null
@@ -0,0 +1,39 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Andrei Vagin <avagin@openvz.org>
+Date: Wed, 25 Oct 2017 10:16:42 -0700
+Subject: net/unix: don't show information about sockets from other namespaces
+
+From: Andrei Vagin <avagin@openvz.org>
+
+
+[ Upstream commit 0f5da659d8f1810f44de14acf2c80cd6499623a0 ]
+
+socket_diag shows information only about sockets from a namespace where
+a diag socket lives.
+
+But if we request information about one unix socket, the kernel don't
+check that its netns is matched with a diag socket namespace, so any
+user can get information about any unix socket in a system. This looks
+like a bug.
+
+v2: add a Fixes tag
+
+Fixes: 51d7cccf0723 ("net: make sock diag per-namespace")
+Signed-off-by: Andrei Vagin <avagin@openvz.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/unix/diag.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/unix/diag.c
++++ b/net/unix/diag.c
+@@ -257,6 +257,8 @@ static int unix_diag_get_exact(struct sk
+       err = -ENOENT;
+       if (sk == NULL)
+               goto out_nosk;
++      if (!net_eq(sock_net(sk), net))
++              goto out;
+       err = sock_diag_check_cookie(sk, req->udiag_cookie);
+       if (err)
diff --git a/queue-4.13/net_sched-avoid-matching-qdisc-with-zero-handle.patch b/queue-4.13/net_sched-avoid-matching-qdisc-with-zero-handle.patch
new file mode 100644 (file)
index 0000000..5194ac5
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Cong Wang <xiyou.wangcong@gmail.com>
+Date: Fri, 27 Oct 2017 22:08:56 -0700
+Subject: net_sched: avoid matching qdisc with zero handle
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+
+[ Upstream commit 50317fce2cc70a2bbbc4b42c31bbad510382a53c ]
+
+Davide found the following script triggers a NULL pointer
+dereference:
+
+ip l a name eth0 type dummy
+tc q a dev eth0 parent :1 handle 1: htb
+
+This is because for a freshly created netdevice noop_qdisc
+is attached and when passing 'parent :1', kernel actually
+tries to match the major handle which is 0 and noop_qdisc
+has handle 0 so is matched by mistake. Commit 69012ae425d7
+tries to fix a similar bug but still misses this case.
+
+Handle 0 is not a valid one, should be just skipped. In
+fact, kernel uses it as TC_H_UNSPEC.
+
+Fixes: 69012ae425d7 ("net: sched: fix handling of singleton qdiscs with qdisc_hash")
+Fixes: 59cc1f61f09c ("net: sched:convert qdisc linked list to hashtable")
+Reported-by: Davide Caratti <dcaratti@redhat.com>
+Cc: Jiri Kosina <jkosina@suse.cz>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Jamal Hadi Salim <jhs@mojatatu.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_api.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/sched/sch_api.c
++++ b/net/sched/sch_api.c
+@@ -307,6 +307,8 @@ struct Qdisc *qdisc_lookup(struct net_de
+ {
+       struct Qdisc *q;
++      if (!handle)
++              return NULL;
+       q = qdisc_match_from_root(dev->qdisc, handle);
+       if (q)
+               goto out;
diff --git a/queue-4.13/netlink-do-not-set-cb_running-if-dump-s-start-errs.patch b/queue-4.13/netlink-do-not-set-cb_running-if-dump-s-start-errs.patch
new file mode 100644 (file)
index 0000000..095bc0e
--- /dev/null
@@ -0,0 +1,61 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+Date: Mon, 9 Oct 2017 14:14:51 +0200
+Subject: netlink: do not set cb_running if dump's start() errs
+
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+
+
+[ Upstream commit 41c87425a1ac9b633e0fcc78eb1f19640c8fb5a0 ]
+
+It turns out that multiple places can call netlink_dump(), which means
+it's still possible to dereference partially initialized values in
+dump() that were the result of a faulty returned start().
+
+This fixes the issue by calling start() _before_ setting cb_running to
+true, so that there's no chance at all of hitting the dump() function
+through any indirect paths.
+
+It also moves the call to start() to be when the mutex is held. This has
+the nice side effect of serializing invocations to start(), which is
+likely desirable anyway. It also prevents any possible other races that
+might come out of this logic.
+
+In testing this with several different pieces of tricky code to trigger
+these issues, this commit fixes all avenues that I'm aware of.
+
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Cc: Johannes Berg <johannes@sipsolutions.net>
+Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c |   13 +++++++------
+ 1 file changed, 7 insertions(+), 6 deletions(-)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -2258,16 +2258,17 @@ int __netlink_dump_start(struct sock *ss
+       cb->min_dump_alloc = control->min_dump_alloc;
+       cb->skb = skb;
++      if (cb->start) {
++              ret = cb->start(cb);
++              if (ret)
++                      goto error_unlock;
++      }
++
+       nlk->cb_running = true;
+       mutex_unlock(nlk->cb_mutex);
+-      ret = 0;
+-      if (cb->start)
+-              ret = cb->start(cb);
+-
+-      if (!ret)
+-              ret = netlink_dump(sk);
++      ret = netlink_dump(sk);
+       sock_put(sk);
diff --git a/queue-4.13/netlink-fix-netlink_ack-extack-race.patch b/queue-4.13/netlink-fix-netlink_ack-extack-race.patch
new file mode 100644 (file)
index 0000000..1a9e597
--- /dev/null
@@ -0,0 +1,67 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Johannes Berg <johannes.berg@intel.com>
+Date: Mon, 16 Oct 2017 17:09:53 +0200
+Subject: netlink: fix netlink_ack() extack race
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+
+[ Upstream commit 48044eb490be71c203e14dd89e8bae87209eab52 ]
+
+It seems that it's possible to toggle NETLINK_F_EXT_ACK
+through setsockopt() while another thread/CPU is building
+a message inside netlink_ack(), which could then trigger
+the WARN_ON()s I added since if it goes from being turned
+off to being turned on between allocating and filling the
+message, the skb could end up being too small.
+
+Avoid this whole situation by storing the value of this
+flag in a separate variable and using that throughout the
+function instead.
+
+Fixes: 2d4bc93368f5 ("netlink: extended ACK reporting")
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c |    8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -2299,6 +2299,7 @@ void netlink_ack(struct sk_buff *in_skb,
+       size_t tlvlen = 0;
+       struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk);
+       unsigned int flags = 0;
++      bool nlk_has_extack = nlk->flags & NETLINK_F_EXT_ACK;
+       /* Error messages get the original request appened, unless the user
+        * requests to cap the error message, and get extra error data if
+@@ -2309,7 +2310,7 @@ void netlink_ack(struct sk_buff *in_skb,
+                       payload += nlmsg_len(nlh);
+               else
+                       flags |= NLM_F_CAPPED;
+-              if (nlk->flags & NETLINK_F_EXT_ACK && extack) {
++              if (nlk_has_extack && extack) {
+                       if (extack->_msg)
+                               tlvlen += nla_total_size(strlen(extack->_msg) + 1);
+                       if (extack->bad_attr)
+@@ -2318,8 +2319,7 @@ void netlink_ack(struct sk_buff *in_skb,
+       } else {
+               flags |= NLM_F_CAPPED;
+-              if (nlk->flags & NETLINK_F_EXT_ACK &&
+-                  extack && extack->cookie_len)
++              if (nlk_has_extack && extack && extack->cookie_len)
+                       tlvlen += nla_total_size(extack->cookie_len);
+       }
+@@ -2347,7 +2347,7 @@ void netlink_ack(struct sk_buff *in_skb,
+       errmsg->error = err;
+       memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh));
+-      if (nlk->flags & NETLINK_F_EXT_ACK && extack) {
++      if (nlk_has_extack && extack) {
+               if (err) {
+                       if (extack->_msg)
+                               WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG,
diff --git a/queue-4.13/packet-avoid-panic-in-packet_getsockopt.patch b/queue-4.13/packet-avoid-panic-in-packet_getsockopt.patch
new file mode 100644 (file)
index 0000000..4df2677
--- /dev/null
@@ -0,0 +1,86 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Wed, 18 Oct 2017 16:14:52 -0700
+Subject: packet: avoid panic in packet_getsockopt()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 509c7a1ecc8601f94ffba8a00889fefb239c00c6 ]
+
+syzkaller got crashes in packet_getsockopt() processing
+PACKET_ROLLOVER_STATS command while another thread was managing
+to change po->rollover
+
+Using RCU will fix this bug. We might later add proper RCU annotations
+for sparse sake.
+
+In v2: I replaced kfree(rollover) in fanout_add() to kfree_rcu()
+variant, as spotted by John.
+
+Fixes: a9b6391814d5 ("packet: rollover statistics")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Cc: Willem de Bruijn <willemb@google.com>
+Cc: John Sperbeck <jsperbeck@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |   24 ++++++++++++++++--------
+ 1 file changed, 16 insertions(+), 8 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -1771,7 +1771,7 @@ static int fanout_add(struct sock *sk, u
+ out:
+       if (err && rollover) {
+-              kfree(rollover);
++              kfree_rcu(rollover, rcu);
+               po->rollover = NULL;
+       }
+       mutex_unlock(&fanout_mutex);
+@@ -1798,8 +1798,10 @@ static struct packet_fanout *fanout_rele
+               else
+                       f = NULL;
+-              if (po->rollover)
++              if (po->rollover) {
+                       kfree_rcu(po->rollover, rcu);
++                      po->rollover = NULL;
++              }
+       }
+       mutex_unlock(&fanout_mutex);
+@@ -3853,6 +3855,7 @@ static int packet_getsockopt(struct sock
+       void *data = &val;
+       union tpacket_stats_u st;
+       struct tpacket_rollover_stats rstats;
++      struct packet_rollover *rollover;
+       if (level != SOL_PACKET)
+               return -ENOPROTOOPT;
+@@ -3931,13 +3934,18 @@ static int packet_getsockopt(struct sock
+                      0);
+               break;
+       case PACKET_ROLLOVER_STATS:
+-              if (!po->rollover)
++              rcu_read_lock();
++              rollover = rcu_dereference(po->rollover);
++              if (rollover) {
++                      rstats.tp_all = atomic_long_read(&rollover->num);
++                      rstats.tp_huge = atomic_long_read(&rollover->num_huge);
++                      rstats.tp_failed = atomic_long_read(&rollover->num_failed);
++                      data = &rstats;
++                      lv = sizeof(rstats);
++              }
++              rcu_read_unlock();
++              if (!rollover)
+                       return -EINVAL;
+-              rstats.tp_all = atomic_long_read(&po->rollover->num);
+-              rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
+-              rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
+-              data = &rstats;
+-              lv = sizeof(rstats);
+               break;
+       case PACKET_TX_HAS_OFF:
+               val = po->tp_tx_has_off;
diff --git a/queue-4.13/ppp-fix-race-in-ppp-device-destruction.patch b/queue-4.13/ppp-fix-race-in-ppp-device-destruction.patch
new file mode 100644 (file)
index 0000000..57e25aa
--- /dev/null
@@ -0,0 +1,113 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Guillaume Nault <g.nault@alphalink.fr>
+Date: Fri, 6 Oct 2017 17:05:49 +0200
+Subject: ppp: fix race in ppp device destruction
+
+From: Guillaume Nault <g.nault@alphalink.fr>
+
+
+[ Upstream commit 6151b8b37b119e8e3a8401b080d532520c95faf4 ]
+
+ppp_release() tries to ensure that netdevices are unregistered before
+decrementing the unit refcount and running ppp_destroy_interface().
+
+This is all fine as long as the the device is unregistered by
+ppp_release(): the unregister_netdevice() call, followed by
+rtnl_unlock(), guarantee that the unregistration process completes
+before rtnl_unlock() returns.
+
+However, the device may be unregistered by other means (like
+ppp_nl_dellink()). If this happens right before ppp_release() calling
+rtnl_lock(), then ppp_release() has to wait for the concurrent
+unregistration code to release the lock.
+But rtnl_unlock() releases the lock before completing the device
+unregistration process. This allows ppp_release() to proceed and
+eventually call ppp_destroy_interface() before the unregistration
+process completes. Calling free_netdev() on this partially unregistered
+device will BUG():
+
+ ------------[ cut here ]------------
+ kernel BUG at net/core/dev.c:8141!
+ invalid opcode: 0000 [#1] SMP
+
+ CPU: 1 PID: 1557 Comm: pppd Not tainted 4.14.0-rc2+ #4
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1.fc26 04/01/2014
+
+ Call Trace:
+  ppp_destroy_interface+0xd8/0xe0 [ppp_generic]
+  ppp_disconnect_channel+0xda/0x110 [ppp_generic]
+  ppp_unregister_channel+0x5e/0x110 [ppp_generic]
+  pppox_unbind_sock+0x23/0x30 [pppox]
+  pppoe_connect+0x130/0x440 [pppoe]
+  SYSC_connect+0x98/0x110
+  ? do_fcntl+0x2c0/0x5d0
+  SyS_connect+0xe/0x10
+  entry_SYSCALL_64_fastpath+0x1a/0xa5
+
+ RIP: free_netdev+0x107/0x110 RSP: ffffc28a40573d88
+ ---[ end trace ed294ff0cc40eeff ]---
+
+We could set the ->needs_free_netdev flag on PPP devices and move the
+ppp_destroy_interface() logic in the ->priv_destructor() callback. But
+that'd be quite intrusive as we'd first need to unlink from the other
+channels and units that depend on the device (the ones that used the
+PPPIOCCONNECT and PPPIOCATTACH ioctls).
+
+Instead, we can just let the netdevice hold a reference on its
+ppp_file. This reference is dropped in ->priv_destructor(), at the very
+end of the unregistration process, so that neither ppp_release() nor
+ppp_disconnect_channel() can call ppp_destroy_interface() in the interim.
+
+Reported-by: Beniamino Galvani <bgalvani@redhat.com>
+Fixes: 8cb775bc0a34 ("ppp: fix device unregistration upon netns deletion")
+Signed-off-by: Guillaume Nault <g.nault@alphalink.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ppp/ppp_generic.c |   20 ++++++++++++++++++++
+ 1 file changed, 20 insertions(+)
+
+--- a/drivers/net/ppp/ppp_generic.c
++++ b/drivers/net/ppp/ppp_generic.c
+@@ -1339,7 +1339,17 @@ ppp_get_stats64(struct net_device *dev,
+ static int ppp_dev_init(struct net_device *dev)
+ {
++      struct ppp *ppp;
++
+       netdev_lockdep_set_classes(dev);
++
++      ppp = netdev_priv(dev);
++      /* Let the netdevice take a reference on the ppp file. This ensures
++       * that ppp_destroy_interface() won't run before the device gets
++       * unregistered.
++       */
++      atomic_inc(&ppp->file.refcnt);
++
+       return 0;
+ }
+@@ -1362,6 +1372,15 @@ static void ppp_dev_uninit(struct net_de
+       wake_up_interruptible(&ppp->file.rwait);
+ }
++static void ppp_dev_priv_destructor(struct net_device *dev)
++{
++      struct ppp *ppp;
++
++      ppp = netdev_priv(dev);
++      if (atomic_dec_and_test(&ppp->file.refcnt))
++              ppp_destroy_interface(ppp);
++}
++
+ static const struct net_device_ops ppp_netdev_ops = {
+       .ndo_init        = ppp_dev_init,
+       .ndo_uninit      = ppp_dev_uninit,
+@@ -1387,6 +1406,7 @@ static void ppp_setup(struct net_device
+       dev->tx_queue_len = 3;
+       dev->type = ARPHRD_PPP;
+       dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
++      dev->priv_destructor = ppp_dev_priv_destructor;
+       netif_keep_dst(dev);
+ }
diff --git a/queue-4.13/sctp-add-the-missing-sock_owned_by_user-check-in-sctp_icmp_redirect.patch b/queue-4.13/sctp-add-the-missing-sock_owned_by_user-check-in-sctp_icmp_redirect.patch
new file mode 100644 (file)
index 0000000..92f0a66
--- /dev/null
@@ -0,0 +1,48 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Wed, 18 Oct 2017 21:37:49 +0800
+Subject: sctp: add the missing sock_owned_by_user check in sctp_icmp_redirect
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 1cc276cec9ec574d41cf47dfc0f51406b6f26ab4 ]
+
+Now sctp processes icmp redirect packet in sctp_icmp_redirect where
+it calls sctp_transport_dst_check in which tp->dst can be released.
+
+The problem is before calling sctp_transport_dst_check, it doesn't
+check sock_owned_by_user, which means tp->dst could be freed while
+a process is accessing it with owning the socket.
+
+An use-after-free issue could be triggered by this.
+
+This patch is to fix it by checking sock_owned_by_user before calling
+sctp_transport_dst_check in sctp_icmp_redirect, so that it would not
+release tp->dst if users still hold sock lock.
+
+Besides, the same issue fixed in commit 45caeaa5ac0b ("dccp/tcp: fix
+routing redirect race") on sctp also needs this check.
+
+Fixes: 55be7a9c6074 ("ipv4: Add redirect support to all protocol icmp error handlers")
+Reported-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Acked-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/input.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/sctp/input.c
++++ b/net/sctp/input.c
+@@ -421,7 +421,7 @@ void sctp_icmp_redirect(struct sock *sk,
+ {
+       struct dst_entry *dst;
+-      if (!t)
++      if (sock_owned_by_user(sk) || !t)
+               return;
+       dst = sctp_transport_dst_check(t);
+       if (dst)
diff --git a/queue-4.13/sctp-full-support-for-ipv6-ip_nonlocal_bind-ip_freebind.patch b/queue-4.13/sctp-full-support-for-ipv6-ip_nonlocal_bind-ip_freebind.patch
new file mode 100644 (file)
index 0000000..9b96de9
--- /dev/null
@@ -0,0 +1,55 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Laszlo Toth <laszlth@gmail.com>
+Date: Mon, 23 Oct 2017 19:19:33 +0200
+Subject: sctp: full support for ipv6 ip_nonlocal_bind & IP_FREEBIND
+
+From: Laszlo Toth <laszlth@gmail.com>
+
+
+[ Upstream commit b71d21c274eff20a9db8158882b545b141b73ab8 ]
+
+Commit 9b9742022888 ("sctp: support ipv6 nonlocal bind")
+introduced support for the above options as v4 sctp did,
+so patched sctp_v6_available().
+
+In the v4 implementation it's enough, because
+sctp_inet_bind_verify() just returns with sctp_v4_available().
+However sctp_inet6_bind_verify() has an extra check before that
+for link-local scope_id, which won't respect the above options.
+
+Added the checks before calling ipv6_chk_addr(), but
+not before the validation of scope_id.
+
+before (w/ both options):
+ ./v6test fe80::10 sctp
+ bind failed, errno: 99 (Cannot assign requested address)
+ ./v6test fe80::10 tcp
+ bind success, errno: 0 (Success)
+
+after (w/ both options):
+ ./v6test fe80::10 sctp
+ bind success, errno: 0 (Success)
+
+Signed-off-by: Laszlo Toth <laszlth@gmail.com>
+Reviewed-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/ipv6.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/sctp/ipv6.c
++++ b/net/sctp/ipv6.c
+@@ -882,8 +882,10 @@ static int sctp_inet6_bind_verify(struct
+                       net = sock_net(&opt->inet.sk);
+                       rcu_read_lock();
+                       dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id);
+-                      if (!dev ||
+-                          !ipv6_chk_addr(net, &addr->v6.sin6_addr, dev, 0)) {
++                      if (!dev || !(opt->inet.freebind ||
++                                    net->ipv6.sysctl.ip_nonlocal_bind ||
++                                    ipv6_chk_addr(net, &addr->v6.sin6_addr,
++                                                  dev, 0))) {
+                               rcu_read_unlock();
+                               return 0;
+                       }
diff --git a/queue-4.13/sctp-reset-owner-sk-for-data-chunks-on-out-queues-when-migrating-a-sock.patch b/queue-4.13/sctp-reset-owner-sk-for-data-chunks-on-out-queues-when-migrating-a-sock.patch
new file mode 100644 (file)
index 0000000..e9c31c9
--- /dev/null
@@ -0,0 +1,100 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Sat, 28 Oct 2017 02:13:29 +0800
+Subject: sctp: reset owner sk for data chunks on out queues when migrating a sock
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit d04adf1b355181e737b6b1e23d801b07f0b7c4c0 ]
+
+Now when migrating sock to another one in sctp_sock_migrate(), it only
+resets owner sk for the data in receive queues, not the chunks on out
+queues.
+
+It would cause that data chunks length on the sock is not consistent
+with sk sk_wmem_alloc. When closing the sock or freeing these chunks,
+the old sk would never be freed, and the new sock may crash due to
+the overflow sk_wmem_alloc.
+
+syzbot found this issue with this series:
+
+  r0 = socket$inet_sctp()
+  sendto$inet(r0)
+  listen(r0)
+  accept4(r0)
+  close(r0)
+
+Although listen() should have returned error when one TCP-style socket
+is in connecting (I may fix this one in another patch), it could also
+be reproduced by peeling off an assoc.
+
+This issue is there since very beginning.
+
+This patch is to reset owner sk for the chunks on out queues so that
+sk sk_wmem_alloc has correct value after accept one sock or peeloff
+an assoc to one sock.
+
+Note that when resetting owner sk for chunks on outqueue, it has to
+sctp_clear_owner_w/skb_orphan chunks before changing assoc->base.sk
+first and then sctp_set_owner_w them after changing assoc->base.sk,
+due to that sctp_wfree and it's callees are using assoc->base.sk.
+
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sctp/socket.c |   32 ++++++++++++++++++++++++++++++++
+ 1 file changed, 32 insertions(+)
+
+--- a/net/sctp/socket.c
++++ b/net/sctp/socket.c
+@@ -169,6 +169,36 @@ static inline void sctp_set_owner_w(stru
+       sk_mem_charge(sk, chunk->skb->truesize);
+ }
++static void sctp_clear_owner_w(struct sctp_chunk *chunk)
++{
++      skb_orphan(chunk->skb);
++}
++
++static void sctp_for_each_tx_datachunk(struct sctp_association *asoc,
++                                     void (*cb)(struct sctp_chunk *))
++
++{
++      struct sctp_outq *q = &asoc->outqueue;
++      struct sctp_transport *t;
++      struct sctp_chunk *chunk;
++
++      list_for_each_entry(t, &asoc->peer.transport_addr_list, transports)
++              list_for_each_entry(chunk, &t->transmitted, transmitted_list)
++                      cb(chunk);
++
++      list_for_each_entry(chunk, &q->retransmit, list)
++              cb(chunk);
++
++      list_for_each_entry(chunk, &q->sacked, list)
++              cb(chunk);
++
++      list_for_each_entry(chunk, &q->abandoned, list)
++              cb(chunk);
++
++      list_for_each_entry(chunk, &q->out_chunk_list, list)
++              cb(chunk);
++}
++
+ /* Verify that this is a valid address. */
+ static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr,
+                                  int len)
+@@ -8196,7 +8226,9 @@ static void sctp_sock_migrate(struct soc
+        * paths won't try to lock it and then oldsk.
+        */
+       lock_sock_nested(newsk, SINGLE_DEPTH_NESTING);
++      sctp_for_each_tx_datachunk(assoc, sctp_clear_owner_w);
+       sctp_assoc_migrate(assoc, newsk);
++      sctp_for_each_tx_datachunk(assoc, sctp_set_owner_w);
+       /* If the association on the newsk is already closed before accept()
+        * is called, set RCV_SHUTDOWN flag.
diff --git a/queue-4.13/series b/queue-4.13/series
new file mode 100644 (file)
index 0000000..5634667
--- /dev/null
@@ -0,0 +1,37 @@
+ppp-fix-race-in-ppp-device-destruction.patch
+gso-fix-payload-length-when-gso_size-is-zero.patch
+ipv4-fix-traffic-triggered-ipsec-connections.patch
+ipv6-fix-traffic-triggered-ipsec-connections.patch
+netlink-do-not-set-cb_running-if-dump-s-start-errs.patch
+net-call-cgroup_sk_alloc-earlier-in-sk_clone_lock.patch
+macsec-fix-memory-leaks-when-skb_to_sgvec-fails.patch
+l2tp-check-ps-sock-before-running-pppol2tp_session_ioctl.patch
+tun-call-dev_get_valid_name-before-register_netdevice.patch
+netlink-fix-netlink_ack-extack-race.patch
+sctp-add-the-missing-sock_owned_by_user-check-in-sctp_icmp_redirect.patch
+tcp-dccp-fix-ireq-opt-races.patch
+packet-avoid-panic-in-packet_getsockopt.patch
+geneve-fix-function-matching-vni-and-tunnel-id-on-big-endian.patch
+net-bridge-fix-returning-of-vlan-range-op-errors.patch
+soreuseport-fix-initialization-race.patch
+ipv6-flowlabel-do-not-leave-opt-tot_len-with-garbage.patch
+sctp-full-support-for-ipv6-ip_nonlocal_bind-ip_freebind.patch
+tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch
+tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch
+net-dsa-check-master-device-before-put.patch
+net-unix-don-t-show-information-about-sockets-from-other-namespaces.patch
+tap-double-free-in-error-path-in-tap_open.patch
+net-mlx5-fix-health-work-queue-spin-lock-to-irq-safe.patch
+net-mlx5e-properly-deal-with-encap-flows-add-del-under-neigh-update.patch
+ipip-only-increase-err_count-for-some-certain-type-icmp-in-ipip_err.patch
+ip6_gre-only-increase-err_count-for-some-certain-type-icmpv6-in-ip6gre_err.patch
+ip6_gre-update-dst-pmtu-if-dev-mtu-has-been-updated-by-toobig-in-__gre6_xmit.patch
+tcp-refresh-tp-timestamp-before-tcp_mtu_probe.patch
+tap-reference-to-kva-of-an-unloaded-module-causes-kernel-panic.patch
+tun-allow-positive-return-values-on-dev_get_valid_name-call.patch
+sctp-reset-owner-sk-for-data-chunks-on-out-queues-when-migrating-a-sock.patch
+net_sched-avoid-matching-qdisc-with-zero-handle.patch
+l2tp-hold-tunnel-in-pppol2tp_connect.patch
+tun-tap-sanitize-tunsetsndbuf-input.patch
+ipv6-addrconf-increment-ifp-refcount-before-ipv6_del_addr.patch
+tcp-fix-tcp_mtu_probe-vs-highest_sack.patch
diff --git a/queue-4.13/soreuseport-fix-initialization-race.patch b/queue-4.13/soreuseport-fix-initialization-race.patch
new file mode 100644 (file)
index 0000000..bd19887
--- /dev/null
@@ -0,0 +1,91 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Craig Gallek <kraig@google.com>
+Date: Thu, 19 Oct 2017 15:00:29 -0400
+Subject: soreuseport: fix initialization race
+
+From: Craig Gallek <kraig@google.com>
+
+
+[ Upstream commit 1b5f962e71bfad6284574655c406597535c3ea7a ]
+
+Syzkaller stumbled upon a way to trigger
+WARNING: CPU: 1 PID: 13881 at net/core/sock_reuseport.c:41
+reuseport_alloc+0x306/0x3b0 net/core/sock_reuseport.c:39
+
+There are two initialization paths for the sock_reuseport structure in a
+socket: Through the udp/tcp bind paths of SO_REUSEPORT sockets or through
+SO_ATTACH_REUSEPORT_[CE]BPF before bind.  The existing implementation
+assumedthat the socket lock protected both of these paths when it actually
+only protects the SO_ATTACH_REUSEPORT path.  Syzkaller triggered this
+double allocation by running these paths concurrently.
+
+This patch moves the check for double allocation into the reuseport_alloc
+function which is protected by a global spin lock.
+
+Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
+Fixes: c125e80b8868 ("soreuseport: fast reuseport TCP socket selection")
+Signed-off-by: Craig Gallek <kraig@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock_reuseport.c  |   12 +++++++++---
+ net/ipv4/inet_hashtables.c |    5 +----
+ net/ipv4/udp.c             |    5 +----
+ 3 files changed, 11 insertions(+), 11 deletions(-)
+
+--- a/net/core/sock_reuseport.c
++++ b/net/core/sock_reuseport.c
+@@ -36,9 +36,14 @@ int reuseport_alloc(struct sock *sk)
+        * soft irq of receive path or setsockopt from process context
+        */
+       spin_lock_bh(&reuseport_lock);
+-      WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb,
+-                                          lockdep_is_held(&reuseport_lock)),
+-                "multiple allocations for the same socket");
++
++      /* Allocation attempts can occur concurrently via the setsockopt path
++       * and the bind/hash path.  Nothing to do when we lose the race.
++       */
++      if (rcu_dereference_protected(sk->sk_reuseport_cb,
++                                    lockdep_is_held(&reuseport_lock)))
++              goto out;
++
+       reuse = __reuseport_alloc(INIT_SOCKS);
+       if (!reuse) {
+               spin_unlock_bh(&reuseport_lock);
+@@ -49,6 +54,7 @@ int reuseport_alloc(struct sock *sk)
+       reuse->num_socks = 1;
+       rcu_assign_pointer(sk->sk_reuseport_cb, reuse);
++out:
+       spin_unlock_bh(&reuseport_lock);
+       return 0;
+--- a/net/ipv4/inet_hashtables.c
++++ b/net/ipv4/inet_hashtables.c
+@@ -449,10 +449,7 @@ static int inet_reuseport_add_sock(struc
+                       return reuseport_add_sock(sk, sk2);
+       }
+-      /* Initial allocation may have already happened via setsockopt */
+-      if (!rcu_access_pointer(sk->sk_reuseport_cb))
+-              return reuseport_alloc(sk);
+-      return 0;
++      return reuseport_alloc(sk);
+ }
+ int __inet_hash(struct sock *sk, struct sock *osk)
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -231,10 +231,7 @@ static int udp_reuseport_add_sock(struct
+               }
+       }
+-      /* Initial allocation may have already happened via setsockopt */
+-      if (!rcu_access_pointer(sk->sk_reuseport_cb))
+-              return reuseport_alloc(sk);
+-      return 0;
++      return reuseport_alloc(sk);
+ }
+ /**
diff --git a/queue-4.13/tap-double-free-in-error-path-in-tap_open.patch b/queue-4.13/tap-double-free-in-error-path-in-tap_open.patch
new file mode 100644 (file)
index 0000000..9c65862
--- /dev/null
@@ -0,0 +1,66 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Girish Moodalbail <girish.moodalbail@oracle.com>
+Date: Wed, 25 Oct 2017 00:23:04 -0700
+Subject: tap: double-free in error path in tap_open()
+
+From: Girish Moodalbail <girish.moodalbail@oracle.com>
+
+
+[ Upstream commit 78e0ea6791d7baafb8a0ca82b1bd0c7b3453c919 ]
+
+Double free of skb_array in tap module is causing kernel panic. When
+tap_set_queue() fails we free skb_array right away by calling
+skb_array_cleanup(). However, later on skb_array_cleanup() is called
+again by tap_sock_destruct through sock_put(). This patch fixes that
+issue.
+
+Fixes: 362899b8725b35e3 (macvtap: switch to use skb array)
+Signed-off-by: Girish Moodalbail <girish.moodalbail@oracle.com>
+Acked-by: Jason Wang <jasowang@redhat.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tap.c |   18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+--- a/drivers/net/tap.c
++++ b/drivers/net/tap.c
+@@ -517,6 +517,10 @@ static int tap_open(struct inode *inode,
+                                            &tap_proto, 0);
+       if (!q)
+               goto err;
++      if (skb_array_init(&q->skb_array, tap->dev->tx_queue_len, GFP_KERNEL)) {
++              sk_free(&q->sk);
++              goto err;
++      }
+       RCU_INIT_POINTER(q->sock.wq, &q->wq);
+       init_waitqueue_head(&q->wq.wait);
+@@ -540,22 +544,18 @@ static int tap_open(struct inode *inode,
+       if ((tap->dev->features & NETIF_F_HIGHDMA) && (tap->dev->features & NETIF_F_SG))
+               sock_set_flag(&q->sk, SOCK_ZEROCOPY);
+-      err = -ENOMEM;
+-      if (skb_array_init(&q->skb_array, tap->dev->tx_queue_len, GFP_KERNEL))
+-              goto err_array;
+-
+       err = tap_set_queue(tap, file, q);
+-      if (err)
+-              goto err_queue;
++      if (err) {
++              /* tap_sock_destruct() will take care of freeing skb_array */
++              goto err_put;
++      }
+       dev_put(tap->dev);
+       rtnl_unlock();
+       return err;
+-err_queue:
+-      skb_array_cleanup(&q->skb_array);
+-err_array:
++err_put:
+       sock_put(&q->sk);
+ err:
+       if (tap)
diff --git a/queue-4.13/tap-reference-to-kva-of-an-unloaded-module-causes-kernel-panic.patch b/queue-4.13/tap-reference-to-kva-of-an-unloaded-module-causes-kernel-panic.patch
new file mode 100644 (file)
index 0000000..e341dd2
--- /dev/null
@@ -0,0 +1,120 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Girish Moodalbail <girish.moodalbail@oracle.com>
+Date: Fri, 27 Oct 2017 00:00:16 -0700
+Subject: tap: reference to KVA of an unloaded module causes kernel panic
+
+From: Girish Moodalbail <girish.moodalbail@oracle.com>
+
+
+[ Upstream commit dea6e19f4ef746aa18b4c33d1a7fed54356796ed ]
+
+The commit 9a393b5d5988 ("tap: tap as an independent module") created a
+separate tap module that implements tap functionality and exports
+interfaces that will be used by macvtap and ipvtap modules to create
+create respective tap devices.
+
+However, that patch introduced a regression wherein the modules macvtap
+and ipvtap can be removed (through modprobe -r) while there are
+applications using the respective /dev/tapX devices. These applications
+cause kernel to hold reference to /dev/tapX through 'struct cdev
+macvtap_cdev' and 'struct cdev ipvtap_dev' defined in macvtap and ipvtap
+modules respectively. So,  when the application is later closed the
+kernel panics because we are referencing KVA that is present in the
+unloaded modules.
+
+----------8<------- Example ----------8<----------
+$ sudo ip li add name mv0 link enp7s0 type macvtap
+$ sudo ip li show mv0 |grep mv0| awk -e '{print $1 $2}'
+  14:mv0@enp7s0:
+$ cat /dev/tap14 &
+$ lsmod |egrep -i 'tap|vlan'
+macvtap                16384  0
+macvlan                24576  1 macvtap
+tap                    24576  3 macvtap
+$ sudo modprobe -r macvtap
+$ fg
+cat /dev/tap14
+^C
+
+<...system panics...>
+BUG: unable to handle kernel paging request at ffffffffa038c500
+IP: cdev_put+0xf/0x30
+----------8<-----------------8<----------
+
+The fix is to set cdev.owner to the module that creates the tap device
+(either macvtap or ipvtap). With this set, the operations (in
+fs/char_dev.c) on char device holds and releases the module through
+cdev_get() and cdev_put() and will not allow the module to unload
+prematurely.
+
+Fixes: 9a393b5d5988ea4e (tap: tap as an independent module)
+Signed-off-by: Girish Moodalbail <girish.moodalbail@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ipvlan/ipvtap.c |    4 ++--
+ drivers/net/macvtap.c       |    4 ++--
+ drivers/net/tap.c           |    5 +++--
+ include/linux/if_tap.h      |    4 ++--
+ 4 files changed, 9 insertions(+), 8 deletions(-)
+
+--- a/drivers/net/ipvlan/ipvtap.c
++++ b/drivers/net/ipvlan/ipvtap.c
+@@ -197,8 +197,8 @@ static int ipvtap_init(void)
+ {
+       int err;
+-      err = tap_create_cdev(&ipvtap_cdev, &ipvtap_major, "ipvtap");
+-
++      err = tap_create_cdev(&ipvtap_cdev, &ipvtap_major, "ipvtap",
++                            THIS_MODULE);
+       if (err)
+               goto out1;
+--- a/drivers/net/macvtap.c
++++ b/drivers/net/macvtap.c
+@@ -204,8 +204,8 @@ static int macvtap_init(void)
+ {
+       int err;
+-      err = tap_create_cdev(&macvtap_cdev, &macvtap_major, "macvtap");
+-
++      err = tap_create_cdev(&macvtap_cdev, &macvtap_major, "macvtap",
++                            THIS_MODULE);
+       if (err)
+               goto out1;
+--- a/drivers/net/tap.c
++++ b/drivers/net/tap.c
+@@ -1252,8 +1252,8 @@ static int tap_list_add(dev_t major, con
+       return 0;
+ }
+-int tap_create_cdev(struct cdev *tap_cdev,
+-                  dev_t *tap_major, const char *device_name)
++int tap_create_cdev(struct cdev *tap_cdev, dev_t *tap_major,
++                  const char *device_name, struct module *module)
+ {
+       int err;
+@@ -1262,6 +1262,7 @@ int tap_create_cdev(struct cdev *tap_cde
+               goto out1;
+       cdev_init(tap_cdev, &tap_fops);
++      tap_cdev->owner = module;
+       err = cdev_add(tap_cdev, *tap_major, TAP_NUM_DEVS);
+       if (err)
+               goto out2;
+--- a/include/linux/if_tap.h
++++ b/include/linux/if_tap.h
+@@ -73,8 +73,8 @@ void tap_del_queues(struct tap_dev *tap)
+ int tap_get_minor(dev_t major, struct tap_dev *tap);
+ void tap_free_minor(dev_t major, struct tap_dev *tap);
+ int tap_queue_resize(struct tap_dev *tap);
+-int tap_create_cdev(struct cdev *tap_cdev,
+-                  dev_t *tap_major, const char *device_name);
++int tap_create_cdev(struct cdev *tap_cdev, dev_t *tap_major,
++                  const char *device_name, struct module *module);
+ void tap_destroy_cdev(dev_t major, struct cdev *tap_cdev);
+ #endif /*_LINUX_IF_TAP_H_*/
diff --git a/queue-4.13/tcp-dccp-fix-ireq-opt-races.patch b/queue-4.13/tcp-dccp-fix-ireq-opt-races.patch
new file mode 100644 (file)
index 0000000..4c3c09b
--- /dev/null
@@ -0,0 +1,408 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 20 Oct 2017 09:04:13 -0700
+Subject: tcp/dccp: fix ireq->opt races
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit c92e8c02fe664155ac4234516e32544bec0f113d ]
+
+syzkaller found another bug in DCCP/TCP stacks [1]
+
+For the reasons explained in commit ce1050089c96 ("tcp/dccp: fix
+ireq->pktopts race"), we need to make sure we do not access
+ireq->opt unless we own the request sock.
+
+Note the opt field is renamed to ireq_opt to ease grep games.
+
+[1]
+BUG: KASAN: use-after-free in ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474
+Read of size 1 at addr ffff8801c951039c by task syz-executor5/3295
+
+CPU: 1 PID: 3295 Comm: syz-executor5 Not tainted 4.14.0-rc4+ #80
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+Call Trace:
+ __dump_stack lib/dump_stack.c:16 [inline]
+ dump_stack+0x194/0x257 lib/dump_stack.c:52
+ print_address_description+0x73/0x250 mm/kasan/report.c:252
+ kasan_report_error mm/kasan/report.c:351 [inline]
+ kasan_report+0x25b/0x340 mm/kasan/report.c:409
+ __asan_report_load1_noabort+0x14/0x20 mm/kasan/report.c:427
+ ip_queue_xmit+0x1687/0x18e0 net/ipv4/ip_output.c:474
+ tcp_transmit_skb+0x1ab7/0x3840 net/ipv4/tcp_output.c:1135
+ tcp_send_ack.part.37+0x3bb/0x650 net/ipv4/tcp_output.c:3587
+ tcp_send_ack+0x49/0x60 net/ipv4/tcp_output.c:3557
+ __tcp_ack_snd_check+0x2c6/0x4b0 net/ipv4/tcp_input.c:5072
+ tcp_ack_snd_check net/ipv4/tcp_input.c:5085 [inline]
+ tcp_rcv_state_process+0x2eff/0x4850 net/ipv4/tcp_input.c:6071
+ tcp_child_process+0x342/0x990 net/ipv4/tcp_minisocks.c:816
+ tcp_v4_rcv+0x1827/0x2f80 net/ipv4/tcp_ipv4.c:1682
+ ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257
+ dst_input include/net/dst.h:464 [inline]
+ ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493
+ __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476
+ __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514
+ netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587
+ netif_receive_skb+0xae/0x390 net/core/dev.c:4611
+ tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372
+ tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766
+ tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792
+ call_write_iter include/linux/fs.h:1770 [inline]
+ new_sync_write fs/read_write.c:468 [inline]
+ __vfs_write+0x68a/0x970 fs/read_write.c:481
+ vfs_write+0x18f/0x510 fs/read_write.c:543
+ SYSC_write fs/read_write.c:588 [inline]
+ SyS_write+0xef/0x220 fs/read_write.c:580
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+RIP: 0033:0x40c341
+RSP: 002b:00007f469523ec10 EFLAGS: 00000293 ORIG_RAX: 0000000000000001
+RAX: ffffffffffffffda RBX: 0000000000718000 RCX: 000000000040c341
+RDX: 0000000000000037 RSI: 0000000020004000 RDI: 0000000000000015
+RBP: 0000000000000086 R08: 0000000000000000 R09: 0000000000000000
+R10: 00000000000f4240 R11: 0000000000000293 R12: 00000000004b7fd1
+R13: 00000000ffffffff R14: 0000000020000000 R15: 0000000000025000
+
+Allocated by task 3295:
+ save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
+ save_stack+0x43/0xd0 mm/kasan/kasan.c:447
+ set_track mm/kasan/kasan.c:459 [inline]
+ kasan_kmalloc+0xad/0xe0 mm/kasan/kasan.c:551
+ __do_kmalloc mm/slab.c:3725 [inline]
+ __kmalloc+0x162/0x760 mm/slab.c:3734
+ kmalloc include/linux/slab.h:498 [inline]
+ tcp_v4_save_options include/net/tcp.h:1962 [inline]
+ tcp_v4_init_req+0x2d3/0x3e0 net/ipv4/tcp_ipv4.c:1271
+ tcp_conn_request+0xf6d/0x3410 net/ipv4/tcp_input.c:6283
+ tcp_v4_conn_request+0x157/0x210 net/ipv4/tcp_ipv4.c:1313
+ tcp_rcv_state_process+0x8ea/0x4850 net/ipv4/tcp_input.c:5857
+ tcp_v4_do_rcv+0x55c/0x7d0 net/ipv4/tcp_ipv4.c:1482
+ tcp_v4_rcv+0x2d10/0x2f80 net/ipv4/tcp_ipv4.c:1711
+ ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257
+ dst_input include/net/dst.h:464 [inline]
+ ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493
+ __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476
+ __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514
+ netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587
+ netif_receive_skb+0xae/0x390 net/core/dev.c:4611
+ tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372
+ tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766
+ tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792
+ call_write_iter include/linux/fs.h:1770 [inline]
+ new_sync_write fs/read_write.c:468 [inline]
+ __vfs_write+0x68a/0x970 fs/read_write.c:481
+ vfs_write+0x18f/0x510 fs/read_write.c:543
+ SYSC_write fs/read_write.c:588 [inline]
+ SyS_write+0xef/0x220 fs/read_write.c:580
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+
+Freed by task 3306:
+ save_stack_trace+0x16/0x20 arch/x86/kernel/stacktrace.c:59
+ save_stack+0x43/0xd0 mm/kasan/kasan.c:447
+ set_track mm/kasan/kasan.c:459 [inline]
+ kasan_slab_free+0x71/0xc0 mm/kasan/kasan.c:524
+ __cache_free mm/slab.c:3503 [inline]
+ kfree+0xca/0x250 mm/slab.c:3820
+ inet_sock_destruct+0x59d/0x950 net/ipv4/af_inet.c:157
+ __sk_destruct+0xfd/0x910 net/core/sock.c:1560
+ sk_destruct+0x47/0x80 net/core/sock.c:1595
+ __sk_free+0x57/0x230 net/core/sock.c:1603
+ sk_free+0x2a/0x40 net/core/sock.c:1614
+ sock_put include/net/sock.h:1652 [inline]
+ inet_csk_complete_hashdance+0xd5/0xf0 net/ipv4/inet_connection_sock.c:959
+ tcp_check_req+0xf4d/0x1620 net/ipv4/tcp_minisocks.c:765
+ tcp_v4_rcv+0x17f6/0x2f80 net/ipv4/tcp_ipv4.c:1675
+ ip_local_deliver_finish+0x2e2/0xba0 net/ipv4/ip_input.c:216
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_local_deliver+0x1ce/0x6e0 net/ipv4/ip_input.c:257
+ dst_input include/net/dst.h:464 [inline]
+ ip_rcv_finish+0x887/0x19a0 net/ipv4/ip_input.c:397
+ NF_HOOK include/linux/netfilter.h:249 [inline]
+ ip_rcv+0xc3f/0x1820 net/ipv4/ip_input.c:493
+ __netif_receive_skb_core+0x1a3e/0x34b0 net/core/dev.c:4476
+ __netif_receive_skb+0x2c/0x1b0 net/core/dev.c:4514
+ netif_receive_skb_internal+0x10b/0x670 net/core/dev.c:4587
+ netif_receive_skb+0xae/0x390 net/core/dev.c:4611
+ tun_rx_batched.isra.50+0x5ed/0x860 drivers/net/tun.c:1372
+ tun_get_user+0x249c/0x36d0 drivers/net/tun.c:1766
+ tun_chr_write_iter+0xbf/0x160 drivers/net/tun.c:1792
+ call_write_iter include/linux/fs.h:1770 [inline]
+ new_sync_write fs/read_write.c:468 [inline]
+ __vfs_write+0x68a/0x970 fs/read_write.c:481
+ vfs_write+0x18f/0x510 fs/read_write.c:543
+ SYSC_write fs/read_write.c:588 [inline]
+ SyS_write+0xef/0x220 fs/read_write.c:580
+ entry_SYSCALL_64_fastpath+0x1f/0xbe
+
+Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets")
+Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_sock.h         |    2 +-
+ net/dccp/ipv4.c                 |   13 ++++++++-----
+ net/ipv4/cipso_ipv4.c           |   24 +++++++-----------------
+ net/ipv4/inet_connection_sock.c |    8 +++-----
+ net/ipv4/syncookies.c           |    2 +-
+ net/ipv4/tcp_input.c            |    2 +-
+ net/ipv4/tcp_ipv4.c             |   21 ++++++++++++---------
+ 7 files changed, 33 insertions(+), 39 deletions(-)
+
+--- a/include/net/inet_sock.h
++++ b/include/net/inet_sock.h
+@@ -96,7 +96,7 @@ struct inet_request_sock {
+       kmemcheck_bitfield_end(flags);
+       u32                     ir_mark;
+       union {
+-              struct ip_options_rcu   *opt;
++              struct ip_options_rcu __rcu     *ireq_opt;
+ #if IS_ENABLED(CONFIG_IPV6)
+               struct {
+                       struct ipv6_txoptions   *ipv6_opt;
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -414,8 +414,7 @@ struct sock *dccp_v4_request_recv_sock(c
+       sk_daddr_set(newsk, ireq->ir_rmt_addr);
+       sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
+       newinet->inet_saddr     = ireq->ir_loc_addr;
+-      newinet->inet_opt       = ireq->opt;
+-      ireq->opt          = NULL;
++      RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt));
+       newinet->mc_index  = inet_iif(skb);
+       newinet->mc_ttl    = ip_hdr(skb)->ttl;
+       newinet->inet_id   = jiffies;
+@@ -430,7 +429,10 @@ struct sock *dccp_v4_request_recv_sock(c
+       if (__inet_inherit_port(sk, newsk) < 0)
+               goto put_and_exit;
+       *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+-
++      if (*own_req)
++              ireq->ireq_opt = NULL;
++      else
++              newinet->inet_opt = NULL;
+       return newsk;
+ exit_overflow:
+@@ -441,6 +443,7 @@ exit:
+       __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS);
+       return NULL;
+ put_and_exit:
++      newinet->inet_opt = NULL;
+       inet_csk_prepare_forced_close(newsk);
+       dccp_done(newsk);
+       goto exit;
+@@ -492,7 +495,7 @@ static int dccp_v4_send_response(const s
+                                                             ireq->ir_rmt_addr);
+               err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+                                           ireq->ir_rmt_addr,
+-                                          ireq->opt);
++                                          rcu_dereference(ireq->ireq_opt));
+               err = net_xmit_eval(err);
+       }
+@@ -548,7 +551,7 @@ out:
+ static void dccp_v4_reqsk_destructor(struct request_sock *req)
+ {
+       dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg);
+-      kfree(inet_rsk(req)->opt);
++      kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
+ }
+ void dccp_syn_ack_timeout(const struct request_sock *req)
+--- a/net/ipv4/cipso_ipv4.c
++++ b/net/ipv4/cipso_ipv4.c
+@@ -1951,7 +1951,7 @@ int cipso_v4_req_setattr(struct request_
+       buf = NULL;
+       req_inet = inet_rsk(req);
+-      opt = xchg(&req_inet->opt, opt);
++      opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt);
+       if (opt)
+               kfree_rcu(opt, rcu);
+@@ -1973,11 +1973,13 @@ req_setattr_failure:
+  * values on failure.
+  *
+  */
+-static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr)
++static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr)
+ {
++      struct ip_options_rcu *opt = rcu_dereference_protected(*opt_ptr, 1);
+       int hdr_delta = 0;
+-      struct ip_options_rcu *opt = *opt_ptr;
++      if (!opt || opt->opt.cipso == 0)
++              return 0;
+       if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) {
+               u8 cipso_len;
+               u8 cipso_off;
+@@ -2039,14 +2041,10 @@ static int cipso_v4_delopt(struct ip_opt
+  */
+ void cipso_v4_sock_delattr(struct sock *sk)
+ {
+-      int hdr_delta;
+-      struct ip_options_rcu *opt;
+       struct inet_sock *sk_inet;
++      int hdr_delta;
+       sk_inet = inet_sk(sk);
+-      opt = rcu_dereference_protected(sk_inet->inet_opt, 1);
+-      if (!opt || opt->opt.cipso == 0)
+-              return;
+       hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt);
+       if (sk_inet->is_icsk && hdr_delta > 0) {
+@@ -2066,15 +2064,7 @@ void cipso_v4_sock_delattr(struct sock *
+  */
+ void cipso_v4_req_delattr(struct request_sock *req)
+ {
+-      struct ip_options_rcu *opt;
+-      struct inet_request_sock *req_inet;
+-
+-      req_inet = inet_rsk(req);
+-      opt = req_inet->opt;
+-      if (!opt || opt->opt.cipso == 0)
+-              return;
+-
+-      cipso_v4_delopt(&req_inet->opt);
++      cipso_v4_delopt(&inet_rsk(req)->ireq_opt);
+ }
+ /**
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -537,9 +537,10 @@ struct dst_entry *inet_csk_route_req(con
+ {
+       const struct inet_request_sock *ireq = inet_rsk(req);
+       struct net *net = read_pnet(&ireq->ireq_net);
+-      struct ip_options_rcu *opt = ireq->opt;
++      struct ip_options_rcu *opt;
+       struct rtable *rt;
++      opt = rcu_dereference(ireq->ireq_opt);
+       flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
+                          RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+                          sk->sk_protocol, inet_sk_flowi_flags(sk),
+@@ -573,10 +574,9 @@ struct dst_entry *inet_csk_route_child_s
+       struct flowi4 *fl4;
+       struct rtable *rt;
++      opt = rcu_dereference(ireq->ireq_opt);
+       fl4 = &newinet->cork.fl.u.ip4;
+-      rcu_read_lock();
+-      opt = rcu_dereference(newinet->inet_opt);
+       flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
+                          RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+                          sk->sk_protocol, inet_sk_flowi_flags(sk),
+@@ -589,13 +589,11 @@ struct dst_entry *inet_csk_route_child_s
+               goto no_route;
+       if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway)
+               goto route_err;
+-      rcu_read_unlock();
+       return &rt->dst;
+ route_err:
+       ip_rt_put(rt);
+ no_route:
+-      rcu_read_unlock();
+       __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
+       return NULL;
+ }
+--- a/net/ipv4/syncookies.c
++++ b/net/ipv4/syncookies.c
+@@ -355,7 +355,7 @@ struct sock *cookie_v4_check(struct sock
+       /* We throwed the options of the initial SYN away, so we hope
+        * the ACK carries the same options again (see RFC1122 4.2.3.8)
+        */
+-      ireq->opt = tcp_v4_save_options(skb);
++      RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(skb));
+       if (security_inet_conn_request(sk, skb, req)) {
+               reqsk_free(req);
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -6235,7 +6235,7 @@ struct request_sock *inet_reqsk_alloc(co
+               struct inet_request_sock *ireq = inet_rsk(req);
+               kmemcheck_annotate_bitfield(ireq, flags);
+-              ireq->opt = NULL;
++              ireq->ireq_opt = NULL;
+ #if IS_ENABLED(CONFIG_IPV6)
+               ireq->pktopts = NULL;
+ #endif
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -878,7 +878,7 @@ static int tcp_v4_send_synack(const stru
+               err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+                                           ireq->ir_rmt_addr,
+-                                          ireq->opt);
++                                          rcu_dereference(ireq->ireq_opt));
+               err = net_xmit_eval(err);
+       }
+@@ -890,7 +890,7 @@ static int tcp_v4_send_synack(const stru
+  */
+ static void tcp_v4_reqsk_destructor(struct request_sock *req)
+ {
+-      kfree(inet_rsk(req)->opt);
++      kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1));
+ }
+ #ifdef CONFIG_TCP_MD5SIG
+@@ -1269,7 +1269,7 @@ static void tcp_v4_init_req(struct reque
+       sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr);
+       sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr);
+-      ireq->opt = tcp_v4_save_options(skb);
++      RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(skb));
+ }
+ static struct dst_entry *tcp_v4_route_req(const struct sock *sk,
+@@ -1356,10 +1356,9 @@ struct sock *tcp_v4_syn_recv_sock(const
+       sk_daddr_set(newsk, ireq->ir_rmt_addr);
+       sk_rcv_saddr_set(newsk, ireq->ir_loc_addr);
+       newsk->sk_bound_dev_if = ireq->ir_iif;
+-      newinet->inet_saddr           = ireq->ir_loc_addr;
+-      inet_opt              = ireq->opt;
+-      rcu_assign_pointer(newinet->inet_opt, inet_opt);
+-      ireq->opt             = NULL;
++      newinet->inet_saddr   = ireq->ir_loc_addr;
++      inet_opt              = rcu_dereference(ireq->ireq_opt);
++      RCU_INIT_POINTER(newinet->inet_opt, inet_opt);
+       newinet->mc_index     = inet_iif(skb);
+       newinet->mc_ttl       = ip_hdr(skb)->ttl;
+       newinet->rcv_tos      = ip_hdr(skb)->tos;
+@@ -1404,9 +1403,12 @@ struct sock *tcp_v4_syn_recv_sock(const
+       if (__inet_inherit_port(sk, newsk) < 0)
+               goto put_and_exit;
+       *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
+-      if (*own_req)
++      if (likely(*own_req)) {
+               tcp_move_syn(newtp, req);
+-
++              ireq->ireq_opt = NULL;
++      } else {
++              newinet->inet_opt = NULL;
++      }
+       return newsk;
+ exit_overflow:
+@@ -1417,6 +1419,7 @@ exit:
+       tcp_listendrop(sk);
+       return NULL;
+ put_and_exit:
++      newinet->inet_opt = NULL;
+       inet_csk_prepare_forced_close(newsk);
+       tcp_done(newsk);
+       goto exit;
diff --git a/queue-4.13/tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch b/queue-4.13/tcp-dccp-fix-lockdep-splat-in-inet_csk_route_req.patch
new file mode 100644 (file)
index 0000000..91453c1
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Sun, 22 Oct 2017 12:33:57 -0700
+Subject: tcp/dccp: fix lockdep splat in inet_csk_route_req()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit a6ca7abe53633d08eea1c6756cb49c9b2d4c90bf ]
+
+This patch fixes the following lockdep splat in inet_csk_route_req()
+
+  lockdep_rcu_suspicious
+  inet_csk_route_req
+  tcp_v4_send_synack
+  tcp_rtx_synack
+  inet_rtx_syn_ack
+  tcp_fastopen_synack_time
+  tcp_retransmit_timer
+  tcp_write_timer_handler
+  tcp_write_timer
+  call_timer_fn
+
+Thread running inet_csk_route_req() owns a reference on the request
+socket, so we have the guarantee ireq->ireq_opt wont be changed or
+freed.
+
+lockdep can enforce this invariant for us.
+
+Fixes: c92e8c02fe66 ("tcp/dccp: fix ireq->opt races")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/inet_connection_sock.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -540,7 +540,8 @@ struct dst_entry *inet_csk_route_req(con
+       struct ip_options_rcu *opt;
+       struct rtable *rt;
+-      opt = rcu_dereference(ireq->ireq_opt);
++      opt = rcu_dereference_protected(ireq->ireq_opt,
++                                      refcount_read(&req->rsk_refcnt) > 0);
+       flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
+                          RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+                          sk->sk_protocol, inet_sk_flowi_flags(sk),
diff --git a/queue-4.13/tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch b/queue-4.13/tcp-dccp-fix-other-lockdep-splats-accessing-ireq_opt.patch
new file mode 100644 (file)
index 0000000..5f41a7e
--- /dev/null
@@ -0,0 +1,113 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 24 Oct 2017 08:20:31 -0700
+Subject: tcp/dccp: fix other lockdep splats accessing ireq_opt
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 06f877d613be3621604c2520ec0351d9fbdca15f ]
+
+In my first attempt to fix the lockdep splat, I forgot we could
+enter inet_csk_route_req() with a freshly allocated request socket,
+for which refcount has not yet been elevated, due to complex
+SLAB_TYPESAFE_BY_RCU rules.
+
+We either are in rcu_read_lock() section _or_ we own a refcount on the
+request.
+
+Correct RCU verb to use here is rcu_dereference_check(), although it is
+not possible to prove we actually own a reference on a shared
+refcount :/
+
+In v2, I added ireq_opt_deref() helper and use in three places, to fix other
+possible splats.
+
+[   49.844590]  lockdep_rcu_suspicious+0xea/0xf3
+[   49.846487]  inet_csk_route_req+0x53/0x14d
+[   49.848334]  tcp_v4_route_req+0xe/0x10
+[   49.850174]  tcp_conn_request+0x31c/0x6a0
+[   49.851992]  ? __lock_acquire+0x614/0x822
+[   49.854015]  tcp_v4_conn_request+0x5a/0x79
+[   49.855957]  ? tcp_v4_conn_request+0x5a/0x79
+[   49.858052]  tcp_rcv_state_process+0x98/0xdcc
+[   49.859990]  ? sk_filter_trim_cap+0x2f6/0x307
+[   49.862085]  tcp_v4_do_rcv+0xfc/0x145
+[   49.864055]  ? tcp_v4_do_rcv+0xfc/0x145
+[   49.866173]  tcp_v4_rcv+0x5ab/0xaf9
+[   49.868029]  ip_local_deliver_finish+0x1af/0x2e7
+[   49.870064]  ip_local_deliver+0x1b2/0x1c5
+[   49.871775]  ? inet_del_offload+0x45/0x45
+[   49.873916]  ip_rcv_finish+0x3f7/0x471
+[   49.875476]  ip_rcv+0x3f1/0x42f
+[   49.876991]  ? ip_local_deliver_finish+0x2e7/0x2e7
+[   49.878791]  __netif_receive_skb_core+0x6d3/0x950
+[   49.880701]  ? process_backlog+0x7e/0x216
+[   49.882589]  __netif_receive_skb+0x1d/0x5e
+[   49.884122]  process_backlog+0x10c/0x216
+[   49.885812]  net_rx_action+0x147/0x3df
+
+Fixes: a6ca7abe53633 ("tcp/dccp: fix lockdep splat in inet_csk_route_req()")
+Fixes: c92e8c02fe66 ("tcp/dccp: fix ireq->opt races")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: kernel test robot <fengguang.wu@intel.com>
+Reported-by: Maciej Żenczykowski <maze@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/inet_sock.h         |    6 ++++++
+ net/dccp/ipv4.c                 |    2 +-
+ net/ipv4/inet_connection_sock.c |    4 ++--
+ net/ipv4/tcp_ipv4.c             |    2 +-
+ 4 files changed, 10 insertions(+), 4 deletions(-)
+
+--- a/include/net/inet_sock.h
++++ b/include/net/inet_sock.h
+@@ -132,6 +132,12 @@ static inline int inet_request_bound_dev
+       return sk->sk_bound_dev_if;
+ }
++static inline struct ip_options_rcu *ireq_opt_deref(const struct inet_request_sock *ireq)
++{
++      return rcu_dereference_check(ireq->ireq_opt,
++                                   refcount_read(&ireq->req.rsk_refcnt) > 0);
++}
++
+ struct inet_cork {
+       unsigned int            flags;
+       __be32                  addr;
+--- a/net/dccp/ipv4.c
++++ b/net/dccp/ipv4.c
+@@ -495,7 +495,7 @@ static int dccp_v4_send_response(const s
+                                                             ireq->ir_rmt_addr);
+               err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+                                           ireq->ir_rmt_addr,
+-                                          rcu_dereference(ireq->ireq_opt));
++                                          ireq_opt_deref(ireq));
+               err = net_xmit_eval(err);
+       }
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -540,8 +540,8 @@ struct dst_entry *inet_csk_route_req(con
+       struct ip_options_rcu *opt;
+       struct rtable *rt;
+-      opt = rcu_dereference_protected(ireq->ireq_opt,
+-                                      refcount_read(&req->rsk_refcnt) > 0);
++      opt = ireq_opt_deref(ireq);
++
+       flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
+                          RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
+                          sk->sk_protocol, inet_sk_flowi_flags(sk),
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -878,7 +878,7 @@ static int tcp_v4_send_synack(const stru
+               err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
+                                           ireq->ir_rmt_addr,
+-                                          rcu_dereference(ireq->ireq_opt));
++                                          ireq_opt_deref(ireq));
+               err = net_xmit_eval(err);
+       }
diff --git a/queue-4.13/tcp-fix-tcp_mtu_probe-vs-highest_sack.patch b/queue-4.13/tcp-fix-tcp_mtu_probe-vs-highest_sack.patch
new file mode 100644 (file)
index 0000000..2a18fbc
--- /dev/null
@@ -0,0 +1,81 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 30 Oct 2017 23:08:20 -0700
+Subject: tcp: fix tcp_mtu_probe() vs highest_sack
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit 2b7cda9c35d3b940eb9ce74b30bbd5eb30db493d ]
+
+Based on SNMP values provided by Roman, Yuchung made the observation
+that some crashes in tcp_sacktag_walk() might be caused by MTU probing.
+
+Looking at tcp_mtu_probe(), I found that when a new skb was placed
+in front of the write queue, we were not updating tcp highest sack.
+
+If one skb is freed because all its content was copied to the new skb
+(for MTU probing), then tp->highest_sack could point to a now freed skb.
+
+Bad things would then happen, including infinite loops.
+
+This patch renames tcp_highest_sack_combine() and uses it
+from tcp_mtu_probe() to fix the bug.
+
+Note that I also removed one test against tp->sacked_out,
+since we want to replace tp->highest_sack regardless of whatever
+condition, since keeping a stale pointer to freed skb is a recipe
+for disaster.
+
+Fixes: a47e5a988a57 ("[TCP]: Convert highest_sack to sk_buff to allow direct access")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
+Reported-by: Roman Gushchin <guro@fb.com>
+Reported-by: Oleksandr Natalenko <oleksandr@natalenko.name>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/tcp.h     |    6 +++---
+ net/ipv4/tcp_output.c |    3 ++-
+ 2 files changed, 5 insertions(+), 4 deletions(-)
+
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1750,12 +1750,12 @@ static inline void tcp_highest_sack_rese
+       tcp_sk(sk)->highest_sack = tcp_write_queue_head(sk);
+ }
+-/* Called when old skb is about to be deleted (to be combined with new skb) */
+-static inline void tcp_highest_sack_combine(struct sock *sk,
++/* Called when old skb is about to be deleted and replaced by new skb */
++static inline void tcp_highest_sack_replace(struct sock *sk,
+                                           struct sk_buff *old,
+                                           struct sk_buff *new)
+ {
+-      if (tcp_sk(sk)->sacked_out && (old == tcp_sk(sk)->highest_sack))
++      if (old == tcp_highest_sack(sk))
+               tcp_sk(sk)->highest_sack = new;
+ }
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2094,6 +2094,7 @@ static int tcp_mtu_probe(struct sock *sk
+       nskb->ip_summed = skb->ip_summed;
+       tcp_insert_write_queue_before(nskb, skb, sk);
++      tcp_highest_sack_replace(sk, skb, nskb);
+       len = 0;
+       tcp_for_write_queue_from_safe(skb, next, sk) {
+@@ -2694,7 +2695,7 @@ static bool tcp_collapse_retrans(struct
+               else if (!skb_shift(skb, next_skb, next_skb_size))
+                       return false;
+       }
+-      tcp_highest_sack_combine(sk, next_skb, skb);
++      tcp_highest_sack_replace(sk, next_skb, skb);
+       tcp_unlink_write_queue(next_skb, sk);
diff --git a/queue-4.13/tcp-refresh-tp-timestamp-before-tcp_mtu_probe.patch b/queue-4.13/tcp-refresh-tp-timestamp-before-tcp_mtu_probe.patch
new file mode 100644 (file)
index 0000000..00f4d28
--- /dev/null
@@ -0,0 +1,42 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Thu, 26 Oct 2017 21:21:40 -0700
+Subject: tcp: refresh tp timestamp before tcp_mtu_probe()
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit ee1836aec4f5a977c1699a311db4d9027ef21ac8 ]
+
+In the unlikely event tcp_mtu_probe() is sending a packet, we
+want tp->tcp_mstamp being as accurate as possible.
+
+This means we need to call tcp_mstamp_refresh() a bit earlier in
+tcp_write_xmit().
+
+Fixes: 385e20706fac ("tcp: use tp->tcp_mstamp in output path")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -2271,6 +2271,7 @@ static bool tcp_write_xmit(struct sock *
+       sent_pkts = 0;
++      tcp_mstamp_refresh(tp);
+       if (!push_one) {
+               /* Do MTU probing. */
+               result = tcp_mtu_probe(sk);
+@@ -2282,7 +2283,6 @@ static bool tcp_write_xmit(struct sock *
+       }
+       max_segs = tcp_tso_segs(sk, mss_now);
+-      tcp_mstamp_refresh(tp);
+       while ((skb = tcp_send_head(sk))) {
+               unsigned int limit;
diff --git a/queue-4.13/tun-allow-positive-return-values-on-dev_get_valid_name-call.patch b/queue-4.13/tun-allow-positive-return-values-on-dev_get_valid_name-call.patch
new file mode 100644 (file)
index 0000000..c78a904
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Julien Gomes <julien@arista.com>
+Date: Wed, 25 Oct 2017 11:50:50 -0700
+Subject: tun: allow positive return values on dev_get_valid_name() call
+
+From: Julien Gomes <julien@arista.com>
+
+
+[ Upstream commit 5c25f65fd1e42685f7ccd80e0621829c105785d9 ]
+
+If the name argument of dev_get_valid_name() contains "%d", it will try
+to assign it a unit number in __dev__alloc_name() and return either the
+unit number (>= 0) or an error code (< 0).
+Considering positive values as error values prevent tun device creations
+relying this mechanism, therefor we should only consider negative values
+as errors here.
+
+Signed-off-by: Julien Gomes <julien@arista.com>
+Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -1814,7 +1814,7 @@ static int tun_set_iff(struct net *net,
+               if (!dev)
+                       return -ENOMEM;
+               err = dev_get_valid_name(net, dev, name);
+-              if (err)
++              if (err < 0)
+                       goto err_free_dev;
+               dev_net_set(dev, net);
diff --git a/queue-4.13/tun-call-dev_get_valid_name-before-register_netdevice.patch b/queue-4.13/tun-call-dev_get_valid_name-before-register_netdevice.patch
new file mode 100644 (file)
index 0000000..a8eda78
--- /dev/null
@@ -0,0 +1,82 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Cong Wang <xiyou.wangcong@gmail.com>
+Date: Fri, 13 Oct 2017 11:58:53 -0700
+Subject: tun: call dev_get_valid_name() before register_netdevice()
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+
+[ Upstream commit 0ad646c81b2182f7fa67ec0c8c825e0ee165696d ]
+
+register_netdevice() could fail early when we have an invalid
+dev name, in which case ->ndo_uninit() is not called. For tun
+device, this is a problem because a timer etc. are already
+initialized and it expects ->ndo_uninit() to clean them up.
+
+We could move these initializations into a ->ndo_init() so
+that register_netdevice() knows better, however this is still
+complicated due to the logic in tun_detach().
+
+Therefore, I choose to just call dev_get_valid_name() before
+register_netdevice(), which is quicker and much easier to audit.
+And for this specific case, it is already enough.
+
+Fixes: 96442e42429e ("tuntap: choose the txq based on rxq")
+Reported-by: Dmitry Alexeev <avekceeb@gmail.com>
+Cc: Jason Wang <jasowang@redhat.com>
+Cc: "Michael S. Tsirkin" <mst@redhat.com>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c         |    3 +++
+ include/linux/netdevice.h |    3 +++
+ net/core/dev.c            |    6 +++---
+ 3 files changed, 9 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -1813,6 +1813,9 @@ static int tun_set_iff(struct net *net,
+               if (!dev)
+                       return -ENOMEM;
++              err = dev_get_valid_name(net, dev, name);
++              if (err)
++                      goto err_free_dev;
+               dev_net_set(dev, net);
+               dev->rtnl_link_ops = &tun_link_ops;
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -3702,6 +3702,9 @@ struct net_device *alloc_netdev_mqs(int
+                                   unsigned char name_assign_type,
+                                   void (*setup)(struct net_device *),
+                                   unsigned int txqs, unsigned int rxqs);
++int dev_get_valid_name(struct net *net, struct net_device *dev,
++                     const char *name);
++
+ #define alloc_netdev(sizeof_priv, name, name_assign_type, setup) \
+       alloc_netdev_mqs(sizeof_priv, name, name_assign_type, setup, 1, 1)
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -1146,9 +1146,8 @@ static int dev_alloc_name_ns(struct net
+       return ret;
+ }
+-static int dev_get_valid_name(struct net *net,
+-                            struct net_device *dev,
+-                            const char *name)
++int dev_get_valid_name(struct net *net, struct net_device *dev,
++                     const char *name)
+ {
+       BUG_ON(!net);
+@@ -1164,6 +1163,7 @@ static int dev_get_valid_name(struct net
+       return 0;
+ }
++EXPORT_SYMBOL(dev_get_valid_name);
+ /**
+  *    dev_change_name - change name of a device
diff --git a/queue-4.13/tun-tap-sanitize-tunsetsndbuf-input.patch b/queue-4.13/tun-tap-sanitize-tunsetsndbuf-input.patch
new file mode 100644 (file)
index 0000000..4c53d4d
--- /dev/null
@@ -0,0 +1,88 @@
+From foo@baz Wed Nov 15 17:25:34 CET 2017
+From: Craig Gallek <kraig@google.com>
+Date: Mon, 30 Oct 2017 18:50:11 -0400
+Subject: tun/tap: sanitize TUNSETSNDBUF input
+
+From: Craig Gallek <kraig@google.com>
+
+
+[ Upstream commit 93161922c658c714715686cd0cf69b090cb9bf1d ]
+
+Syzkaller found several variants of the lockup below by setting negative
+values with the TUNSETSNDBUF ioctl.  This patch adds a sanity check
+to both the tun and tap versions of this ioctl.
+
+  watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [repro:2389]
+  Modules linked in:
+  irq event stamp: 329692056
+  hardirqs last  enabled at (329692055): [<ffffffff824b8381>] _raw_spin_unlock_irqrestore+0x31/0x75
+  hardirqs last disabled at (329692056): [<ffffffff824b9e58>] apic_timer_interrupt+0x98/0xb0
+  softirqs last  enabled at (35659740): [<ffffffff824bc958>] __do_softirq+0x328/0x48c
+  softirqs last disabled at (35659731): [<ffffffff811c796c>] irq_exit+0xbc/0xd0
+  CPU: 0 PID: 2389 Comm: repro Not tainted 4.14.0-rc7 #23
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
+  task: ffff880009452140 task.stack: ffff880006a20000
+  RIP: 0010:_raw_spin_lock_irqsave+0x11/0x80
+  RSP: 0018:ffff880006a27c50 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff10
+  RAX: ffff880009ac68d0 RBX: ffff880006a27ce0 RCX: 0000000000000000
+  RDX: 0000000000000001 RSI: ffff880006a27ce0 RDI: ffff880009ac6900
+  RBP: ffff880006a27c60 R08: 0000000000000000 R09: 0000000000000000
+  R10: 0000000000000001 R11: 000000000063ff00 R12: ffff880009ac6900
+  R13: ffff880006a27cf8 R14: 0000000000000001 R15: ffff880006a27cf8
+  FS:  00007f4be4838700(0000) GS:ffff88000cc00000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 0000000020101000 CR3: 0000000009616000 CR4: 00000000000006f0
+  Call Trace:
+   prepare_to_wait+0x26/0xc0
+   sock_alloc_send_pskb+0x14e/0x270
+   ? remove_wait_queue+0x60/0x60
+   tun_get_user+0x2cc/0x19d0
+   ? __tun_get+0x60/0x1b0
+   tun_chr_write_iter+0x57/0x86
+   __vfs_write+0x156/0x1e0
+   vfs_write+0xf7/0x230
+   SyS_write+0x57/0xd0
+   entry_SYSCALL_64_fastpath+0x1f/0xbe
+  RIP: 0033:0x7f4be4356df9
+  RSP: 002b:00007ffc18101c08 EFLAGS: 00000293 ORIG_RAX: 0000000000000001
+  RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f4be4356df9
+  RDX: 0000000000000046 RSI: 0000000020101000 RDI: 0000000000000005
+  RBP: 00007ffc18101c40 R08: 0000000000000001 R09: 0000000000000001
+  R10: 0000000000000001 R11: 0000000000000293 R12: 0000559c75f64780
+  R13: 00007ffc18101d30 R14: 0000000000000000 R15: 0000000000000000
+
+Fixes: 33dccbb050bb ("tun: Limit amount of queued packets per device")
+Fixes: 20d29d7a916a ("net: macvtap driver")
+Signed-off-by: Craig Gallek <kraig@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tap.c |    2 ++
+ drivers/net/tun.c |    4 ++++
+ 2 files changed, 6 insertions(+)
+
+--- a/drivers/net/tap.c
++++ b/drivers/net/tap.c
+@@ -1035,6 +1035,8 @@ static long tap_ioctl(struct file *file,
+       case TUNSETSNDBUF:
+               if (get_user(s, sp))
+                       return -EFAULT;
++              if (s <= 0)
++                      return -EINVAL;
+               q->sk.sk_sndbuf = s;
+               return 0;
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -2219,6 +2219,10 @@ static long __tun_chr_ioctl(struct file
+                       ret = -EFAULT;
+                       break;
+               }
++              if (sndbuf <= 0) {
++                      ret = -EINVAL;
++                      break;
++              }
+               tun->sndbuf = sndbuf;
+               tun_set_sndbuf(tun);