]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
4.9-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 10 Oct 2017 14:09:51 +0000 (16:09 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 10 Oct 2017 14:09:51 +0000 (16:09 +0200)
added patches:
bpf-one-perf-event-close-won-t-free-bpf-program-attached-by-another-perf-event.patch
bpf-verifier-reject-bpf_alu64-bpf_end.patch
ip6_gre-ip6gre_tap-device-should-keep-dst.patch
ip6_gre-skb_push-ipv6hdr-before-packing-the-header-in-ip6gre_header.patch
ip6_tunnel-do-not-allow-loading-ip6_tunnel-if-ipv6-is-disabled-in-cmdline.patch
ip6_tunnel-update-mtu-properly-for-arphrd_ether-tunnel-device-in-tx-path.patch
isdn-i4l-fetch-the-ppp_write-buffer-in-one-shot.patch
l2tp-avoid-schedule-while-atomic-in-exit_net.patch
l2tp-fix-race-condition-in-l2tp_tunnel_delete.patch
mlxsw-spectrum-prevent-mirred-related-crash-on-removal.patch
net-dsa-fix-network-device-registration-order.patch
net-emac-fix-napi-poll-list-corruption.patch
net-phy-fix-mask-value-write-on-gmii2rgmii-converter-speed-register.patch
net-qcom-emac-specify-the-correct-size-when-mapping-a-dma-buffer.patch
net-rtnetlink-fix-info-leak-in-rtm_getstats-call.patch
net-sched-cls_matchall-fix-crash-when-used-with-classful-qdisc.patch
net-sched-fix-use-after-free-in-tcf_action_destroy-and-tcf_del_walker.patch
net-set-sk_prot_creator-when-cloning-sockets-to-the-right-proto.patch
net_sched-always-reset-qdisc-backlog-in-qdisc_reset.patch
netlink-do-not-proceed-if-dump-s-start-errs.patch
packet-hold-bind-lock-when-rebinding-to-fanout-hook.patch
packet-in-packet_do_bind-test-fanout-with-bind_lock-held.patch
packet-only-test-po-has_vnet_hdr-once-in-packet_snd.patch
sctp-potential-read-out-of-bounds-in-sctp_ulpevent_type_enabled.patch
socket-bpf-fix-possible-use-after-free.patch
tcp-fastopen-fix-on-syn-data-transmit-failure.patch
tcp-fix-data-delivery-rate.patch
tcp-update-skb-skb_mstamp-more-carefully.patch
tipc-use-only-positive-error-codes-in-messages.patch
tun-bail-out-from-tun_get_user-if-the-skb-is-empty.patch
udpv6-fix-the-checksum-computation-when-hw-checksum-does-not-apply.patch
vti-fix-use-after-free-in-vti_tunnel_xmit-vti6_tnl_xmit.patch

33 files changed:
queue-4.9/bpf-one-perf-event-close-won-t-free-bpf-program-attached-by-another-perf-event.patch [new file with mode: 0644]
queue-4.9/bpf-verifier-reject-bpf_alu64-bpf_end.patch [new file with mode: 0644]
queue-4.9/ip6_gre-ip6gre_tap-device-should-keep-dst.patch [new file with mode: 0644]
queue-4.9/ip6_gre-skb_push-ipv6hdr-before-packing-the-header-in-ip6gre_header.patch [new file with mode: 0644]
queue-4.9/ip6_tunnel-do-not-allow-loading-ip6_tunnel-if-ipv6-is-disabled-in-cmdline.patch [new file with mode: 0644]
queue-4.9/ip6_tunnel-update-mtu-properly-for-arphrd_ether-tunnel-device-in-tx-path.patch [new file with mode: 0644]
queue-4.9/isdn-i4l-fetch-the-ppp_write-buffer-in-one-shot.patch [new file with mode: 0644]
queue-4.9/l2tp-avoid-schedule-while-atomic-in-exit_net.patch [new file with mode: 0644]
queue-4.9/l2tp-fix-race-condition-in-l2tp_tunnel_delete.patch [new file with mode: 0644]
queue-4.9/mlxsw-spectrum-prevent-mirred-related-crash-on-removal.patch [new file with mode: 0644]
queue-4.9/net-dsa-fix-network-device-registration-order.patch [new file with mode: 0644]
queue-4.9/net-emac-fix-napi-poll-list-corruption.patch [new file with mode: 0644]
queue-4.9/net-phy-fix-mask-value-write-on-gmii2rgmii-converter-speed-register.patch [new file with mode: 0644]
queue-4.9/net-qcom-emac-specify-the-correct-size-when-mapping-a-dma-buffer.patch [new file with mode: 0644]
queue-4.9/net-rtnetlink-fix-info-leak-in-rtm_getstats-call.patch [new file with mode: 0644]
queue-4.9/net-sched-cls_matchall-fix-crash-when-used-with-classful-qdisc.patch [new file with mode: 0644]
queue-4.9/net-sched-fix-use-after-free-in-tcf_action_destroy-and-tcf_del_walker.patch [new file with mode: 0644]
queue-4.9/net-set-sk_prot_creator-when-cloning-sockets-to-the-right-proto.patch [new file with mode: 0644]
queue-4.9/net_sched-always-reset-qdisc-backlog-in-qdisc_reset.patch [new file with mode: 0644]
queue-4.9/netlink-do-not-proceed-if-dump-s-start-errs.patch [new file with mode: 0644]
queue-4.9/packet-hold-bind-lock-when-rebinding-to-fanout-hook.patch [new file with mode: 0644]
queue-4.9/packet-in-packet_do_bind-test-fanout-with-bind_lock-held.patch [new file with mode: 0644]
queue-4.9/packet-only-test-po-has_vnet_hdr-once-in-packet_snd.patch [new file with mode: 0644]
queue-4.9/sctp-potential-read-out-of-bounds-in-sctp_ulpevent_type_enabled.patch [new file with mode: 0644]
queue-4.9/series
queue-4.9/socket-bpf-fix-possible-use-after-free.patch [new file with mode: 0644]
queue-4.9/tcp-fastopen-fix-on-syn-data-transmit-failure.patch [new file with mode: 0644]
queue-4.9/tcp-fix-data-delivery-rate.patch [new file with mode: 0644]
queue-4.9/tcp-update-skb-skb_mstamp-more-carefully.patch [new file with mode: 0644]
queue-4.9/tipc-use-only-positive-error-codes-in-messages.patch [new file with mode: 0644]
queue-4.9/tun-bail-out-from-tun_get_user-if-the-skb-is-empty.patch [new file with mode: 0644]
queue-4.9/udpv6-fix-the-checksum-computation-when-hw-checksum-does-not-apply.patch [new file with mode: 0644]
queue-4.9/vti-fix-use-after-free-in-vti_tunnel_xmit-vti6_tnl_xmit.patch [new file with mode: 0644]

diff --git a/queue-4.9/bpf-one-perf-event-close-won-t-free-bpf-program-attached-by-another-perf-event.patch b/queue-4.9/bpf-one-perf-event-close-won-t-free-bpf-program-attached-by-another-perf-event.patch
new file mode 100644 (file)
index 0000000..2f96288
--- /dev/null
@@ -0,0 +1,64 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Yonghong Song <yhs@fb.com>
+Date: Mon, 18 Sep 2017 16:38:36 -0700
+Subject: bpf: one perf event close won't free bpf program attached by another perf event
+
+From: Yonghong Song <yhs@fb.com>
+
+
+[ Upstream commit ec9dd352d591f0c90402ec67a317c1ed4fb2e638 ]
+
+This patch fixes a bug exhibited by the following scenario:
+  1. fd1 = perf_event_open with attr.config = ID1
+  2. attach bpf program prog1 to fd1
+  3. fd2 = perf_event_open with attr.config = ID1
+     <this will be successful>
+  4. user program closes fd2 and prog1 is detached from the tracepoint.
+  5. user program with fd1 does not work properly as tracepoint
+     no output any more.
+
+The issue happens at step 4. Multiple perf_event_open can be called
+successfully, but only one bpf prog pointer in the tp_event. In the
+current logic, any fd release for the same tp_event will free
+the tp_event->prog.
+
+The fix is to free tp_event->prog only when the closing fd
+corresponds to the one which registered the program.
+
+Signed-off-by: Yonghong Song <yhs@fb.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/linux/trace_events.h |    1 +
+ kernel/events/core.c         |    3 ++-
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+--- a/include/linux/trace_events.h
++++ b/include/linux/trace_events.h
+@@ -273,6 +273,7 @@ struct trace_event_call {
+       int                             perf_refcount;
+       struct hlist_head __percpu      *perf_events;
+       struct bpf_prog                 *prog;
++      struct perf_event               *bpf_prog_owner;
+       int     (*perf_perm)(struct trace_event_call *,
+                            struct perf_event *);
+--- a/kernel/events/core.c
++++ b/kernel/events/core.c
+@@ -7871,6 +7871,7 @@ static int perf_event_set_bpf_prog(struc
+               }
+       }
+       event->tp_event->prog = prog;
++      event->tp_event->bpf_prog_owner = event;
+       return 0;
+ }
+@@ -7885,7 +7886,7 @@ static void perf_event_free_bpf_prog(str
+               return;
+       prog = event->tp_event->prog;
+-      if (prog) {
++      if (prog && event->tp_event->bpf_prog_owner == event) {
+               event->tp_event->prog = NULL;
+               bpf_prog_put(prog);
+       }
diff --git a/queue-4.9/bpf-verifier-reject-bpf_alu64-bpf_end.patch b/queue-4.9/bpf-verifier-reject-bpf_alu64-bpf_end.patch
new file mode 100644 (file)
index 0000000..3376b95
--- /dev/null
@@ -0,0 +1,35 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Edward Cree <ecree@solarflare.com>
+Date: Fri, 15 Sep 2017 14:37:38 +0100
+Subject: bpf/verifier: reject BPF_ALU64|BPF_END
+
+From: Edward Cree <ecree@solarflare.com>
+
+
+[ Upstream commit e67b8a685c7c984e834e3181ef4619cd7025a136 ]
+
+Neither ___bpf_prog_run nor the JITs accept it.
+Also adds a new test case.
+
+Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)")
+Signed-off-by: Edward Cree <ecree@solarflare.com>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/verifier.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -1720,7 +1720,8 @@ static int check_alu_op(struct bpf_verif
+                       }
+               } else {
+                       if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
+-                          (insn->imm != 16 && insn->imm != 32 && insn->imm != 64)) {
++                          (insn->imm != 16 && insn->imm != 32 && insn->imm != 64) ||
++                          BPF_CLASS(insn->code) == BPF_ALU64) {
+                               verbose("BPF_END uses reserved fields\n");
+                               return -EINVAL;
+                       }
diff --git a/queue-4.9/ip6_gre-ip6gre_tap-device-should-keep-dst.patch b/queue-4.9/ip6_gre-ip6gre_tap-device-should-keep-dst.patch
new file mode 100644 (file)
index 0000000..1e7ee9c
--- /dev/null
@@ -0,0 +1,32 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Thu, 28 Sep 2017 13:23:50 +0800
+Subject: ip6_gre: ip6gre_tap device should keep dst
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 2d40557cc702ed8e5edd9bd422233f86652d932e ]
+
+The patch 'ip_gre: ipgre_tap device should keep dst' fixed
+a issue that ipgre_tap mtu couldn't be updated in tx path.
+
+The same fix is needed for ip6gre_tap as well.
+
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -1298,6 +1298,7 @@ static void ip6gre_tap_setup(struct net_
+       dev->features |= NETIF_F_NETNS_LOCAL;
+       dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+       dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
++      netif_keep_dst(dev);
+ }
+ static bool ip6gre_netlink_encap_parms(struct nlattr *data[],
diff --git a/queue-4.9/ip6_gre-skb_push-ipv6hdr-before-packing-the-header-in-ip6gre_header.patch b/queue-4.9/ip6_gre-skb_push-ipv6hdr-before-packing-the-header-in-ip6gre_header.patch
new file mode 100644 (file)
index 0000000..e07f017
--- /dev/null
@@ -0,0 +1,76 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Fri, 15 Sep 2017 12:00:07 +0800
+Subject: ip6_gre: skb_push ipv6hdr before packing the header in ip6gre_header
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 76cc0d3282d4b933fa144fa41fbc5318e0fdca24 ]
+
+Now in ip6gre_header before packing the ipv6 header, it skb_push t->hlen
+which only includes encap_hlen + tun_hlen. It means greh and inner header
+would be over written by ipv6 stuff and ipv6h might have no chance to set
+up.
+
+Jianlin found this issue when using remote any on ip6_gre, the packets he
+captured on gre dev are truncated:
+
+22:50:26.210866 Out ethertype IPv6 (0x86dd), length 120: truncated-ip6 -\
+8128 bytes missing!(flowlabel 0x92f40, hlim 0, next-header Options (0)  \
+payload length: 8192) ::1:2000:0 > ::1:0:86dd: HBH [trunc] ip-proto-128 \
+8184
+
+It should also skb_push ipv6hdr so that ipv6h points to the right position
+to set ipv6 stuff up.
+
+This patch is to skb_push hlen + sizeof(*ipv6h) and also fix some indents
+in ip6gre_header.
+
+Fixes: c12b395a4664 ("gre: Support GRE over IPv6")
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_gre.c |   21 +++++++++++----------
+ 1 file changed, 11 insertions(+), 10 deletions(-)
+
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -936,24 +936,25 @@ done:
+ }
+ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev,
+-                      unsigned short type,
+-                      const void *daddr, const void *saddr, unsigned int len)
++                       unsigned short type, const void *daddr,
++                       const void *saddr, unsigned int len)
+ {
+       struct ip6_tnl *t = netdev_priv(dev);
+-      struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen);
+-      __be16 *p = (__be16 *)(ipv6h+1);
++      struct ipv6hdr *ipv6h;
++      __be16 *p;
+-      ip6_flow_hdr(ipv6h, 0,
+-                   ip6_make_flowlabel(dev_net(dev), skb,
+-                                      t->fl.u.ip6.flowlabel, true,
+-                                      &t->fl.u.ip6));
++      ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen + sizeof(*ipv6h));
++      ip6_flow_hdr(ipv6h, 0, ip6_make_flowlabel(dev_net(dev), skb,
++                                                t->fl.u.ip6.flowlabel,
++                                                true, &t->fl.u.ip6));
+       ipv6h->hop_limit = t->parms.hop_limit;
+       ipv6h->nexthdr = NEXTHDR_GRE;
+       ipv6h->saddr = t->parms.laddr;
+       ipv6h->daddr = t->parms.raddr;
+-      p[0]            = t->parms.o_flags;
+-      p[1]            = htons(type);
++      p = (__be16 *)(ipv6h + 1);
++      p[0] = t->parms.o_flags;
++      p[1] = htons(type);
+       /*
+        *      Set the source hardware address.
diff --git a/queue-4.9/ip6_tunnel-do-not-allow-loading-ip6_tunnel-if-ipv6-is-disabled-in-cmdline.patch b/queue-4.9/ip6_tunnel-do-not-allow-loading-ip6_tunnel-if-ipv6-is-disabled-in-cmdline.patch
new file mode 100644 (file)
index 0000000..21a6b8c
--- /dev/null
@@ -0,0 +1,61 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Fri, 15 Sep 2017 15:58:33 +0800
+Subject: ip6_tunnel: do not allow loading ip6_tunnel if ipv6 is disabled in cmdline
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit 8c22dab03ad072e45060c299c70d02a4f6fc4aab ]
+
+If ipv6 has been disabled from cmdline since kernel started, it makes
+no sense to allow users to create any ip6 tunnel. Otherwise, it could
+some potential problem.
+
+Jianlin found a kernel crash caused by this in ip6_gre when he set
+ipv6.disable=1 in grub:
+
+[  209.588865] Unable to handle kernel paging request for data at address 0x00000080
+[  209.588872] Faulting instruction address: 0xc000000000a3aa6c
+[  209.588879] Oops: Kernel access of bad area, sig: 11 [#1]
+[  209.589062] NIP [c000000000a3aa6c] fib_rules_lookup+0x4c/0x260
+[  209.589071] LR [c000000000b9ad90] fib6_rule_lookup+0x50/0xb0
+[  209.589076] Call Trace:
+[  209.589097] fib6_rule_lookup+0x50/0xb0
+[  209.589106] rt6_lookup+0xc4/0x110
+[  209.589116] ip6gre_tnl_link_config+0x214/0x2f0 [ip6_gre]
+[  209.589125] ip6gre_newlink+0x138/0x3a0 [ip6_gre]
+[  209.589134] rtnl_newlink+0x798/0xb80
+[  209.589142] rtnetlink_rcv_msg+0xec/0x390
+[  209.589151] netlink_rcv_skb+0x138/0x150
+[  209.589159] rtnetlink_rcv+0x48/0x70
+[  209.589169] netlink_unicast+0x538/0x640
+[  209.589175] netlink_sendmsg+0x40c/0x480
+[  209.589184] ___sys_sendmsg+0x384/0x4e0
+[  209.589194] SyS_sendmsg+0xd4/0x140
+[  209.589201] SyS_socketcall+0x3e0/0x4f0
+[  209.589209] system_call+0x38/0xe0
+
+This patch is to return -EOPNOTSUPP in ip6_tunnel_init if ipv6 has been
+disabled from cmdline.
+
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_tunnel.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/ipv6/ip6_tunnel.c
++++ b/net/ipv6/ip6_tunnel.c
+@@ -2231,6 +2231,9 @@ static int __init ip6_tunnel_init(void)
+ {
+       int  err;
++      if (!ipv6_mod_enabled())
++              return -EOPNOTSUPP;
++
+       err = register_pernet_device(&ip6_tnl_net_ops);
+       if (err < 0)
+               goto out_pernet;
diff --git a/queue-4.9/ip6_tunnel-update-mtu-properly-for-arphrd_ether-tunnel-device-in-tx-path.patch b/queue-4.9/ip6_tunnel-update-mtu-properly-for-arphrd_ether-tunnel-device-in-tx-path.patch
new file mode 100644 (file)
index 0000000..78bd1a1
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Xin Long <lucien.xin@gmail.com>
+Date: Thu, 28 Sep 2017 13:24:07 +0800
+Subject: ip6_tunnel: update mtu properly for ARPHRD_ETHER tunnel device in tx path
+
+From: Xin Long <lucien.xin@gmail.com>
+
+
+[ Upstream commit d41bb33ba33b8f8debe54ed36be6925eb496e354 ]
+
+Now when updating mtu in tx path, it doesn't consider ARPHRD_ETHER tunnel
+device, like ip6gre_tap tunnel, for which it should also subtract ether
+header to get the correct mtu.
+
+Signed-off-by: Xin Long <lucien.xin@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/ip6_tunnel.c |    5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+--- a/net/ipv6/ip6_tunnel.c
++++ b/net/ipv6/ip6_tunnel.c
+@@ -1042,6 +1042,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, st
+       struct dst_entry *dst = NULL, *ndst = NULL;
+       struct net_device *tdev;
+       int mtu;
++      unsigned int eth_hlen = t->dev->type == ARPHRD_ETHER ? ETH_HLEN : 0;
+       unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen;
+       unsigned int max_headroom = psh_hlen;
+       bool use_cache = false;
+@@ -1120,7 +1121,7 @@ route_lookup:
+                                    t->parms.name);
+               goto tx_err_dst_release;
+       }
+-      mtu = dst_mtu(dst) - psh_hlen - t->tun_hlen;
++      mtu = dst_mtu(dst) - eth_hlen - psh_hlen - t->tun_hlen;
+       if (encap_limit >= 0) {
+               max_headroom += 8;
+               mtu -= 8;
+@@ -1129,7 +1130,7 @@ route_lookup:
+               mtu = IPV6_MIN_MTU;
+       if (skb_dst(skb) && !t->parms.collect_md)
+               skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
+-      if (skb->len - t->tun_hlen > mtu && !skb_is_gso(skb)) {
++      if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) {
+               *pmtu = mtu;
+               err = -EMSGSIZE;
+               goto tx_err_dst_release;
diff --git a/queue-4.9/isdn-i4l-fetch-the-ppp_write-buffer-in-one-shot.patch b/queue-4.9/isdn-i4l-fetch-the-ppp_write-buffer-in-one-shot.patch
new file mode 100644 (file)
index 0000000..3238145
--- /dev/null
@@ -0,0 +1,104 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Meng Xu <mengxu.gatech@gmail.com>
+Date: Tue, 19 Sep 2017 21:49:55 -0400
+Subject: isdn/i4l: fetch the ppp_write buffer in one shot
+
+From: Meng Xu <mengxu.gatech@gmail.com>
+
+
+[ Upstream commit 02388bf87f72e1d47174cd8f81c34443920eb5a0 ]
+
+In isdn_ppp_write(), the header (i.e., protobuf) of the buffer is
+fetched twice from userspace. The first fetch is used to peek at the
+protocol of the message and reset the huptimer if necessary; while the
+second fetch copies in the whole buffer. However, given that buf resides
+in userspace memory, a user process can race to change its memory content
+across fetches. By doing so, we can either avoid resetting the huptimer
+for any type of packets (by first setting proto to PPP_LCP and later
+change to the actual type) or force resetting the huptimer for LCP
+packets.
+
+This patch changes this double-fetch behavior into two single fetches
+decided by condition (lp->isdn_device < 0 || lp->isdn_channel <0).
+A more detailed discussion can be found at
+https://marc.info/?l=linux-kernel&m=150586376926123&w=2
+
+Signed-off-by: Meng Xu <mengxu.gatech@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/isdn/i4l/isdn_ppp.c |   37 +++++++++++++++++++++++++------------
+ 1 file changed, 25 insertions(+), 12 deletions(-)
+
+--- a/drivers/isdn/i4l/isdn_ppp.c
++++ b/drivers/isdn/i4l/isdn_ppp.c
+@@ -828,7 +828,6 @@ isdn_ppp_write(int min, struct file *fil
+       isdn_net_local *lp;
+       struct ippp_struct *is;
+       int proto;
+-      unsigned char protobuf[4];
+       is = file->private_data;
+@@ -842,24 +841,28 @@ isdn_ppp_write(int min, struct file *fil
+       if (!lp)
+               printk(KERN_DEBUG "isdn_ppp_write: lp == NULL\n");
+       else {
+-              /*
+-               * Don't reset huptimer for
+-               * LCP packets. (Echo requests).
+-               */
+-              if (copy_from_user(protobuf, buf, 4))
+-                      return -EFAULT;
+-              proto = PPP_PROTOCOL(protobuf);
+-              if (proto != PPP_LCP)
+-                      lp->huptimer = 0;
++              if (lp->isdn_device < 0 || lp->isdn_channel < 0) {
++                      unsigned char protobuf[4];
++                      /*
++                       * Don't reset huptimer for
++                       * LCP packets. (Echo requests).
++                       */
++                      if (copy_from_user(protobuf, buf, 4))
++                              return -EFAULT;
++
++                      proto = PPP_PROTOCOL(protobuf);
++                      if (proto != PPP_LCP)
++                              lp->huptimer = 0;
+-              if (lp->isdn_device < 0 || lp->isdn_channel < 0)
+                       return 0;
++              }
+               if ((dev->drv[lp->isdn_device]->flags & DRV_FLAG_RUNNING) &&
+                   lp->dialstate == 0 &&
+                   (lp->flags & ISDN_NET_CONNECTED)) {
+                       unsigned short hl;
+                       struct sk_buff *skb;
++                      unsigned char *cpy_buf;
+                       /*
+                        * we need to reserve enough space in front of
+                        * sk_buff. old call to dev_alloc_skb only reserved
+@@ -872,11 +875,21 @@ isdn_ppp_write(int min, struct file *fil
+                               return count;
+                       }
+                       skb_reserve(skb, hl);
+-                      if (copy_from_user(skb_put(skb, count), buf, count))
++                      cpy_buf = skb_put(skb, count);
++                      if (copy_from_user(cpy_buf, buf, count))
+                       {
+                               kfree_skb(skb);
+                               return -EFAULT;
+                       }
++
++                      /*
++                       * Don't reset huptimer for
++                       * LCP packets. (Echo requests).
++                       */
++                      proto = PPP_PROTOCOL(cpy_buf);
++                      if (proto != PPP_LCP)
++                              lp->huptimer = 0;
++
+                       if (is->debug & 0x40) {
+                               printk(KERN_DEBUG "ppp xmit: len %d\n", (int) skb->len);
+                               isdn_ppp_frame_log("xmit", skb->data, skb->len, 32, is->unit, lp->ppp_slot);
diff --git a/queue-4.9/l2tp-avoid-schedule-while-atomic-in-exit_net.patch b/queue-4.9/l2tp-avoid-schedule-while-atomic-in-exit_net.patch
new file mode 100644 (file)
index 0000000..6fe7ff8
--- /dev/null
@@ -0,0 +1,100 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Ridge Kennedy <ridge.kennedy@alliedtelesis.co.nz>
+Date: Wed, 22 Feb 2017 14:59:49 +1300
+Subject: l2tp: Avoid schedule while atomic in exit_net
+
+From: Ridge Kennedy <ridge.kennedy@alliedtelesis.co.nz>
+
+
+[ Upstream commit 12d656af4e3d2781b9b9f52538593e1717e7c979 ]
+
+While destroying a network namespace that contains a L2TP tunnel a
+"BUG: scheduling while atomic" can be observed.
+
+Enabling lockdep shows that this is happening because l2tp_exit_net()
+is calling l2tp_tunnel_closeall() (via l2tp_tunnel_delete()) from
+within an RCU critical section.
+
+l2tp_exit_net() takes rcu_read_lock_bh()
+  << list_for_each_entry_rcu() >>
+  l2tp_tunnel_delete()
+    l2tp_tunnel_closeall()
+      __l2tp_session_unhash()
+        synchronize_rcu() << Illegal inside RCU critical section >>
+
+BUG: sleeping function called from invalid context
+in_atomic(): 1, irqs_disabled(): 0, pid: 86, name: kworker/u16:2
+INFO: lockdep is turned off.
+CPU: 2 PID: 86 Comm: kworker/u16:2 Tainted: G        W  O    4.4.6-at1 #2
+Hardware name: Xen HVM domU, BIOS 4.6.1-xs125300 05/09/2016
+Workqueue: netns cleanup_net
+ 0000000000000000 ffff880202417b90 ffffffff812b0013 ffff880202410ac0
+ ffffffff81870de8 ffff880202417bb8 ffffffff8107aee8 ffffffff81870de8
+ 0000000000000c51 0000000000000000 ffff880202417be0 ffffffff8107b024
+Call Trace:
+ [<ffffffff812b0013>] dump_stack+0x85/0xc2
+ [<ffffffff8107aee8>] ___might_sleep+0x148/0x240
+ [<ffffffff8107b024>] __might_sleep+0x44/0x80
+ [<ffffffff810b21bd>] synchronize_sched+0x2d/0xe0
+ [<ffffffff8109be6d>] ? trace_hardirqs_on+0xd/0x10
+ [<ffffffff8105c7bb>] ? __local_bh_enable_ip+0x6b/0xc0
+ [<ffffffff816a1b00>] ? _raw_spin_unlock_bh+0x30/0x40
+ [<ffffffff81667482>] __l2tp_session_unhash+0x172/0x220
+ [<ffffffff81667397>] ? __l2tp_session_unhash+0x87/0x220
+ [<ffffffff8166888b>] l2tp_tunnel_closeall+0x9b/0x140
+ [<ffffffff81668c74>] l2tp_tunnel_delete+0x14/0x60
+ [<ffffffff81668dd0>] l2tp_exit_net+0x110/0x270
+ [<ffffffff81668d5c>] ? l2tp_exit_net+0x9c/0x270
+ [<ffffffff815001c3>] ops_exit_list.isra.6+0x33/0x60
+ [<ffffffff81501166>] cleanup_net+0x1b6/0x280
+ ...
+
+This bug can easily be reproduced with a few steps:
+
+ $ sudo unshare -n bash  # Create a shell in a new namespace
+ # ip link set lo up
+ # ip addr add 127.0.0.1 dev lo
+ # ip l2tp add tunnel remote 127.0.0.1 local 127.0.0.1 tunnel_id 1 \
+    peer_tunnel_id 1 udp_sport 50000 udp_dport 50000
+ # ip l2tp add session name foo tunnel_id 1 session_id 1 \
+    peer_session_id 1
+ # ip link set foo up
+ # exit  # Exit the shell, in turn exiting the namespace
+ $ dmesg
+ ...
+ [942121.089216] BUG: scheduling while atomic: kworker/u16:3/13872/0x00000200
+ ...
+
+To fix this, move the call to l2tp_tunnel_closeall() out of the RCU
+critical section, and instead call it from l2tp_tunnel_del_work(), which
+is running from the l2tp_wq workqueue.
+
+Fixes: 2b551c6e7d5b ("l2tp: close sessions before initiating tunnel delete")
+Signed-off-by: Ridge Kennedy <ridge.kennedy@alliedtelesis.co.nz>
+Acked-by: Guillaume Nault <g.nault@alphalink.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/l2tp/l2tp_core.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/net/l2tp/l2tp_core.c
++++ b/net/l2tp/l2tp_core.c
+@@ -1415,6 +1415,9 @@ static void l2tp_tunnel_del_work(struct
+       struct sock *sk = NULL;
+       tunnel = container_of(work, struct l2tp_tunnel, del_work);
++
++      l2tp_tunnel_closeall(tunnel);
++
+       sk = l2tp_tunnel_sock_lookup(tunnel);
+       if (!sk)
+               goto out;
+@@ -1737,7 +1740,6 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
+ int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
+ {
+       l2tp_tunnel_inc_refcount(tunnel);
+-      l2tp_tunnel_closeall(tunnel);
+       if (false == queue_work(l2tp_wq, &tunnel->del_work)) {
+               l2tp_tunnel_dec_refcount(tunnel);
+               return 1;
diff --git a/queue-4.9/l2tp-fix-race-condition-in-l2tp_tunnel_delete.patch b/queue-4.9/l2tp-fix-race-condition-in-l2tp_tunnel_delete.patch
new file mode 100644 (file)
index 0000000..38b0c4c
--- /dev/null
@@ -0,0 +1,85 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Sabrina Dubroca <sd@queasysnail.net>
+Date: Tue, 26 Sep 2017 16:16:43 +0200
+Subject: l2tp: fix race condition in l2tp_tunnel_delete
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+
+[ Upstream commit 62b982eeb4589b2e6d7c01a90590e3a4c2b2ca19 ]
+
+If we try to delete the same tunnel twice, the first delete operation
+does a lookup (l2tp_tunnel_get), finds the tunnel, calls
+l2tp_tunnel_delete, which queues it for deletion by
+l2tp_tunnel_del_work.
+
+The second delete operation also finds the tunnel and calls
+l2tp_tunnel_delete. If the workqueue has already fired and started
+running l2tp_tunnel_del_work, then l2tp_tunnel_delete will queue the
+same tunnel a second time, and try to free the socket again.
+
+Add a dead flag to prevent firing the workqueue twice. Then we can
+remove the check of queue_work's result that was meant to prevent that
+race but doesn't.
+
+Reproducer:
+
+    ip l2tp add tunnel tunnel_id 3000 peer_tunnel_id 4000 local 192.168.0.2 remote 192.168.0.1 encap udp udp_sport 5000 udp_dport 6000
+    ip l2tp add session name l2tp1 tunnel_id 3000 session_id 1000 peer_session_id 2000
+    ip link set l2tp1 up
+    ip l2tp del tunnel tunnel_id 3000
+    ip l2tp del tunnel tunnel_id 3000
+
+Fixes: f8ccac0e4493 ("l2tp: put tunnel socket release on a workqueue")
+Reported-by: Jianlin Shi <jishi@redhat.com>
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Acked-by: Guillaume Nault <g.nault@alphalink.fr>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/l2tp/l2tp_core.c |   10 ++++------
+ net/l2tp/l2tp_core.h |    5 ++++-
+ 2 files changed, 8 insertions(+), 7 deletions(-)
+
+--- a/net/l2tp/l2tp_core.c
++++ b/net/l2tp/l2tp_core.c
+@@ -1737,14 +1737,12 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
+ /* This function is used by the netlink TUNNEL_DELETE command.
+  */
+-int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
++void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
+ {
+-      l2tp_tunnel_inc_refcount(tunnel);
+-      if (false == queue_work(l2tp_wq, &tunnel->del_work)) {
+-              l2tp_tunnel_dec_refcount(tunnel);
+-              return 1;
++      if (!test_and_set_bit(0, &tunnel->dead)) {
++              l2tp_tunnel_inc_refcount(tunnel);
++              queue_work(l2tp_wq, &tunnel->del_work);
+       }
+-      return 0;
+ }
+ EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
+--- a/net/l2tp/l2tp_core.h
++++ b/net/l2tp/l2tp_core.h
+@@ -169,6 +169,9 @@ struct l2tp_tunnel_cfg {
+ struct l2tp_tunnel {
+       int                     magic;          /* Should be L2TP_TUNNEL_MAGIC */
++
++      unsigned long           dead;
++
+       struct rcu_head rcu;
+       rwlock_t                hlist_lock;     /* protect session_hlist */
+       struct hlist_head       session_hlist[L2TP_HASH_SIZE];
+@@ -257,7 +260,7 @@ int l2tp_tunnel_create(struct net *net,
+                      u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg,
+                      struct l2tp_tunnel **tunnelp);
+ void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
+-int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
++void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
+ struct l2tp_session *l2tp_session_create(int priv_size,
+                                        struct l2tp_tunnel *tunnel,
+                                        u32 session_id, u32 peer_session_id,
diff --git a/queue-4.9/mlxsw-spectrum-prevent-mirred-related-crash-on-removal.patch b/queue-4.9/mlxsw-spectrum-prevent-mirred-related-crash-on-removal.patch
new file mode 100644 (file)
index 0000000..798205d
--- /dev/null
@@ -0,0 +1,107 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Yuval Mintz <yuvalm@mellanox.com>
+Date: Tue, 12 Sep 2017 08:50:53 +0200
+Subject: mlxsw: spectrum: Prevent mirred-related crash on removal
+
+From: Yuval Mintz <yuvalm@mellanox.com>
+
+
+[ Upstream commit 6399ebcccffa12e65bc15eda039d37673264ebce ]
+
+When removing the offloading of mirred actions under
+matchall classifiers, mlxsw would find the destination port
+associated with the offloaded action and utilize it for undoing
+the configuration.
+
+Depending on the order by which ports are removed, it's possible that
+the destination port would get removed before the source port.
+In such a scenario, when actions would be flushed for the source port
+mlxsw would perform an illegal dereference as the destination port is
+no longer listed.
+
+Since the only item necessary for undoing the configuration on the
+destination side is the port-id and that in turn is already maintained
+by mlxsw on the source-port, simply stop trying to access the
+destination port and use the port-id directly instead.
+
+Fixes: 763b4b70af ("mlxsw: spectrum: Add support in matchall mirror TC offloading")
+Signed-off-by: Yuval Mintz <yuvalm@mellanox.com>
+Signed-off-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/mellanox/mlxsw/spectrum.c |   20 ++++++++++----------
+ 1 file changed, 10 insertions(+), 10 deletions(-)
+
+--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
++++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+@@ -249,15 +249,14 @@ static void mlxsw_sp_span_entry_destroy(
+ }
+ static struct mlxsw_sp_span_entry *
+-mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port)
++mlxsw_sp_span_entry_find(struct mlxsw_sp *mlxsw_sp, u8 local_port)
+ {
+-      struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+       int i;
+       for (i = 0; i < mlxsw_sp->span.entries_count; i++) {
+               struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span.entries[i];
+-              if (curr->used && curr->local_port == port->local_port)
++              if (curr->used && curr->local_port == local_port)
+                       return curr;
+       }
+       return NULL;
+@@ -268,7 +267,8 @@ static struct mlxsw_sp_span_entry
+ {
+       struct mlxsw_sp_span_entry *span_entry;
+-      span_entry = mlxsw_sp_span_entry_find(port);
++      span_entry = mlxsw_sp_span_entry_find(port->mlxsw_sp,
++                                            port->local_port);
+       if (span_entry) {
+               /* Already exists, just take a reference */
+               span_entry->ref_count++;
+@@ -453,12 +453,13 @@ err_port_bind:
+ }
+ static void mlxsw_sp_span_mirror_remove(struct mlxsw_sp_port *from,
+-                                      struct mlxsw_sp_port *to,
++                                      u8 destination_port,
+                                       enum mlxsw_sp_span_type type)
+ {
+       struct mlxsw_sp_span_entry *span_entry;
+-      span_entry = mlxsw_sp_span_entry_find(to);
++      span_entry = mlxsw_sp_span_entry_find(from->mlxsw_sp,
++                                            destination_port);
+       if (!span_entry) {
+               netdev_err(from->dev, "no span entry found\n");
+               return;
+@@ -1255,10 +1256,8 @@ static int mlxsw_sp_port_add_cls_matchal
+ static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
+                                          struct tc_cls_matchall_offload *cls)
+ {
+-      struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+       struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry;
+       enum mlxsw_sp_span_type span_type;
+-      struct mlxsw_sp_port *to_port;
+       mall_tc_entry = mlxsw_sp_port_mirror_entry_find(mlxsw_sp_port,
+                                                       cls->cookie);
+@@ -1269,11 +1268,12 @@ static void mlxsw_sp_port_del_cls_matcha
+       switch (mall_tc_entry->type) {
+       case MLXSW_SP_PORT_MALL_MIRROR:
+-              to_port = mlxsw_sp->ports[mall_tc_entry->mirror.to_local_port];
+               span_type = mall_tc_entry->mirror.ingress ?
+                               MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
+-              mlxsw_sp_span_mirror_remove(mlxsw_sp_port, to_port, span_type);
++              mlxsw_sp_span_mirror_remove(mlxsw_sp_port,
++                                          mall_tc_entry->mirror.to_local_port,
++                                          span_type);
+               break;
+       default:
+               WARN_ON(1);
diff --git a/queue-4.9/net-dsa-fix-network-device-registration-order.patch b/queue-4.9/net-dsa-fix-network-device-registration-order.patch
new file mode 100644 (file)
index 0000000..69bd9db
--- /dev/null
@@ -0,0 +1,75 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Florian Fainelli <f.fainelli@gmail.com>
+Date: Mon, 25 Sep 2017 15:55:53 -0700
+Subject: net: dsa: Fix network device registration order
+
+From: Florian Fainelli <f.fainelli@gmail.com>
+
+
+[ Upstream commit e804441cfe0b60f6c430901946a69c01eac09df1 ]
+
+We cannot be registering the network device first, then setting its
+carrier off and finally connecting it to a PHY, doing that leaves a
+window during which the carrier is at best inconsistent, and at worse
+the device is not usable without a down/up sequence since the network
+device is visible to user space with possibly no PHY device attached.
+
+Re-order steps so that they make logical sense. This fixes some devices
+where the port was not usable after e.g: an unbind then bind of the
+driver.
+
+Fixes: 0071f56e46da ("dsa: Register netdev before phy")
+Fixes: 91da11f870f0 ("net: Distributed Switch Architecture protocol support")
+Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/dsa/slave.c |   28 +++++++++++++++++-----------
+ 1 file changed, 17 insertions(+), 11 deletions(-)
+
+--- a/net/dsa/slave.c
++++ b/net/dsa/slave.c
+@@ -1269,26 +1269,32 @@ int dsa_slave_create(struct dsa_switch *
+       p->old_duplex = -1;
+       ds->ports[port].netdev = slave_dev;
+-      ret = register_netdev(slave_dev);
+-      if (ret) {
+-              netdev_err(master, "error %d registering interface %s\n",
+-                         ret, slave_dev->name);
+-              ds->ports[port].netdev = NULL;
+-              free_netdev(slave_dev);
+-              return ret;
+-      }
+       netif_carrier_off(slave_dev);
+       ret = dsa_slave_phy_setup(p, slave_dev);
+       if (ret) {
+               netdev_err(master, "error %d setting up slave phy\n", ret);
+-              unregister_netdev(slave_dev);
+-              free_netdev(slave_dev);
+-              return ret;
++              goto out_free;
++      }
++
++      ret = register_netdev(slave_dev);
++      if (ret) {
++              netdev_err(master, "error %d registering interface %s\n",
++                         ret, slave_dev->name);
++              goto out_phy;
+       }
+       return 0;
++
++out_phy:
++      phy_disconnect(p->phy);
++      if (of_phy_is_fixed_link(ds->ports[port].dn))
++              of_phy_deregister_fixed_link(ds->ports[port].dn);
++out_free:
++      free_netdev(slave_dev);
++      ds->ports[port].netdev = NULL;
++      return ret;
+ }
+ void dsa_slave_destroy(struct net_device *slave_dev)
diff --git a/queue-4.9/net-emac-fix-napi-poll-list-corruption.patch b/queue-4.9/net-emac-fix-napi-poll-list-corruption.patch
new file mode 100644 (file)
index 0000000..aa37b7d
--- /dev/null
@@ -0,0 +1,54 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Christian Lamparter <chunkeey@googlemail.com>
+Date: Tue, 19 Sep 2017 19:35:18 +0200
+Subject: net: emac: Fix napi poll list corruption
+
+From: Christian Lamparter <chunkeey@googlemail.com>
+
+
+[ Upstream commit f55956065ec94e3e9371463d693a1029c4cc3007 ]
+
+This patch is pretty much a carbon copy of
+commit 3079c652141f ("caif: Fix napi poll list corruption")
+with "caif" replaced by "emac".
+
+The commit d75b1ade567f ("net: less interrupt masking in NAPI")
+breaks emac.
+
+It is now required that if the entire budget is consumed when poll
+returns, the napi poll_list must remain empty.  However, like some
+other drivers emac tries to do a last-ditch check and if there is
+more work it will call napi_reschedule and then immediately process
+some of this new work.  Should the entire budget be consumed while
+processing such new work then we will violate the new caller
+contract.
+
+This patch fixes this by not touching any work when we reschedule
+in emac.
+
+Signed-off-by: Christian Lamparter <chunkeey@googlemail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/ibm/emac/mal.c |    3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+--- a/drivers/net/ethernet/ibm/emac/mal.c
++++ b/drivers/net/ethernet/ibm/emac/mal.c
+@@ -402,7 +402,7 @@ static int mal_poll(struct napi_struct *
+       unsigned long flags;
+       MAL_DBG2(mal, "poll(%d)" NL, budget);
+- again:
++
+       /* Process TX skbs */
+       list_for_each(l, &mal->poll_list) {
+               struct mal_commac *mc =
+@@ -451,7 +451,6 @@ static int mal_poll(struct napi_struct *
+                       spin_lock_irqsave(&mal->lock, flags);
+                       mal_disable_eob_irq(mal);
+                       spin_unlock_irqrestore(&mal->lock, flags);
+-                      goto again;
+               }
+               mc->ops->poll_tx(mc->dev);
+       }
diff --git a/queue-4.9/net-phy-fix-mask-value-write-on-gmii2rgmii-converter-speed-register.patch b/queue-4.9/net-phy-fix-mask-value-write-on-gmii2rgmii-converter-speed-register.patch
new file mode 100644 (file)
index 0000000..0e69cd3
--- /dev/null
@@ -0,0 +1,39 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Fahad Kunnathadi <fahad.kunnathadi@dexceldesigns.com>
+Date: Fri, 15 Sep 2017 12:01:58 +0530
+Subject: net: phy: Fix mask value write on gmii2rgmii converter speed register
+
+From: Fahad Kunnathadi <fahad.kunnathadi@dexceldesigns.com>
+
+
+[ Upstream commit f2654a4781318dc7ab8d6cde66f1fa39eab980a9 ]
+
+To clear Speed Selection in MDIO control register(0x10),
+ie, clear bits 6 and 13 to zero while keeping other bits same.
+Before AND operation,The Mask value has to be perform with bitwise NOT
+operation (ie, ~ operator)
+
+This patch clears current speed selection before writing the
+new speed settings to gmii2rgmii converter
+
+Fixes: f411a6160bd4 ("net: phy: Add gmiitorgmii converter support")
+
+Signed-off-by: Fahad Kunnathadi <fahad.kunnathadi@dexceldesigns.com>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/phy/xilinx_gmii2rgmii.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/drivers/net/phy/xilinx_gmii2rgmii.c
++++ b/drivers/net/phy/xilinx_gmii2rgmii.c
+@@ -44,7 +44,7 @@ static int xgmiitorgmii_read_status(stru
+       priv->phy_drv->read_status(phydev);
+       val = mdiobus_read(phydev->mdio.bus, priv->addr, XILINX_GMII2RGMII_REG);
+-      val &= XILINX_GMII2RGMII_SPEED_MASK;
++      val &= ~XILINX_GMII2RGMII_SPEED_MASK;
+       if (phydev->speed == SPEED_1000)
+               val |= BMCR_SPEED1000;
diff --git a/queue-4.9/net-qcom-emac-specify-the-correct-size-when-mapping-a-dma-buffer.patch b/queue-4.9/net-qcom-emac-specify-the-correct-size-when-mapping-a-dma-buffer.patch
new file mode 100644 (file)
index 0000000..089a43f
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Timur Tabi <timur@codeaurora.org>
+Date: Fri, 22 Sep 2017 15:32:44 -0500
+Subject: net: qcom/emac: specify the correct size when mapping a DMA buffer
+
+From: Timur Tabi <timur@codeaurora.org>
+
+
+[ Upstream commit a93ad944f4ff9a797abff17c73fc4b1e4a1d9141 ]
+
+When mapping the RX DMA buffers, the driver was accidentally specifying
+zero for the buffer length.  Under normal circumstances, SWIOTLB does not
+need to allocate a bounce buffer, so the address is just mapped without
+checking the size field.  This is why the error was not detected earlier.
+
+Fixes: b9b17debc69d ("net: emac: emac gigabit ethernet controller driver")
+Cc: stable@vger.kernel.org
+Signed-off-by: Timur Tabi <timur@codeaurora.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/ethernet/qualcomm/emac/emac-mac.c |    3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c
++++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
+@@ -932,7 +932,8 @@ static void emac_mac_rx_descs_refill(str
+               curr_rxbuf->dma_addr =
+                       dma_map_single(adpt->netdev->dev.parent, skb->data,
+-                                     curr_rxbuf->length, DMA_FROM_DEVICE);
++                                     adpt->rxbuf_size, DMA_FROM_DEVICE);
++
+               ret = dma_mapping_error(adpt->netdev->dev.parent,
+                                       curr_rxbuf->dma_addr);
+               if (ret) {
diff --git a/queue-4.9/net-rtnetlink-fix-info-leak-in-rtm_getstats-call.patch b/queue-4.9/net-rtnetlink-fix-info-leak-in-rtm_getstats-call.patch
new file mode 100644 (file)
index 0000000..c5b9fe5
--- /dev/null
@@ -0,0 +1,37 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Date: Tue, 3 Oct 2017 13:20:48 +0300
+Subject: net: rtnetlink: fix info leak in RTM_GETSTATS call
+
+From: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+
+
+[ Upstream commit ce024f42c2e28b6bce4ecc1e891b42f57f753892 ]
+
+When RTM_GETSTATS was added the fields of its header struct were not all
+initialized when returning the result thus leaking 4 bytes of information
+to user-space per rtnl_fill_statsinfo call, so initialize them now. Thanks
+to Alexander Potapenko for the detailed report and bisection.
+
+Reported-by: Alexander Potapenko <glider@google.com>
+Fixes: 10c9ead9f3c6 ("rtnetlink: add new RTM_GETSTATS message to dump link stats")
+Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
+Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/rtnetlink.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/net/core/rtnetlink.c
++++ b/net/core/rtnetlink.c
+@@ -3758,6 +3758,9 @@ static int rtnl_fill_statsinfo(struct sk
+               return -EMSGSIZE;
+       ifsm = nlmsg_data(nlh);
++      ifsm->family = PF_UNSPEC;
++      ifsm->pad1 = 0;
++      ifsm->pad2 = 0;
+       ifsm->ifindex = dev->ifindex;
+       ifsm->filter_mask = filter_mask;
diff --git a/queue-4.9/net-sched-cls_matchall-fix-crash-when-used-with-classful-qdisc.patch b/queue-4.9/net-sched-cls_matchall-fix-crash-when-used-with-classful-qdisc.patch
new file mode 100644 (file)
index 0000000..8d462a5
--- /dev/null
@@ -0,0 +1,53 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Davide Caratti <dcaratti@redhat.com>
+Date: Sat, 16 Sep 2017 14:02:21 +0200
+Subject: net/sched: cls_matchall: fix crash when used with classful qdisc
+
+From: Davide Caratti <dcaratti@redhat.com>
+
+
+[ Upstream commit 3ff4cbec87da48b0ec1f7b6196607b034de0c680 ]
+
+this script, edited from Linux Advanced Routing and Traffic Control guide
+
+tc q a dev en0 root handle 1: htb default a
+tc c a dev en0 parent 1:  classid 1:1 htb rate 6mbit burst 15k
+tc c a dev en0 parent 1:1 classid 1:a htb rate 5mbit ceil 6mbit burst 15k
+tc c a dev en0 parent 1:1 classid 1:b htb rate 1mbit ceil 6mbit burst 15k
+tc f a dev en0 parent 1:0 prio 1 $clsname $clsargs classid 1:b
+ping $address -c1
+tc -s c s dev en0
+
+classifies traffic to 1:b or 1:a, depending on whether the packet matches
+or not the pattern $clsargs of filter $clsname. However, when $clsname is
+'matchall', a systematic crash can be observed in htb_classify(). HTB and
+classful qdiscs don't assign initial value to struct tcf_result, but then
+they expect it to contain valid values after filters have been run. Thus,
+current 'matchall' ignores the TCA_MATCHALL_CLASSID attribute, configured
+by user, and makes HTB (and classful qdiscs) dereference random pointers.
+
+By assigning head->res to *res in mall_classify(), before the actions are
+invoked, we fix this crash and enable TCA_MATCHALL_CLASSID functionality,
+that had no effect on 'matchall' classifier since its first introduction.
+
+BugLink: https://bugzilla.redhat.com/show_bug.cgi?id=1460213
+Reported-by: Jiri Benc <jbenc@redhat.com>
+Fixes: b87f7936a932 ("net/sched: introduce Match-all classifier")
+Signed-off-by: Davide Caratti <dcaratti@redhat.com>
+Acked-by: Yotam Gigi <yotamg@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/cls_matchall.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/sched/cls_matchall.c
++++ b/net/sched/cls_matchall.c
+@@ -32,6 +32,7 @@ static int mall_classify(struct sk_buff
+       if (tc_skip_sw(head->flags))
+               return -1;
++      *res = head->res;
+       return tcf_exts_exec(skb, &head->exts, res);
+ }
diff --git a/queue-4.9/net-sched-fix-use-after-free-in-tcf_action_destroy-and-tcf_del_walker.patch b/queue-4.9/net-sched-fix-use-after-free-in-tcf_action_destroy-and-tcf_del_walker.patch
new file mode 100644 (file)
index 0000000..5e75bb5
--- /dev/null
@@ -0,0 +1,60 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Jiri Pirko <jiri@mellanox.com>
+Date: Wed, 13 Sep 2017 17:32:37 +0200
+Subject: net: sched: fix use-after-free in tcf_action_destroy and tcf_del_walker
+
+From: Jiri Pirko <jiri@mellanox.com>
+
+
+[ Upstream commit 255cd50f207ae8ec7b22663246c833407744e634 ]
+
+Recent commit d7fb60b9cafb ("net_sched: get rid of tcfa_rcu") removed
+freeing in call_rcu, which changed already existing hard-to-hit
+race condition into 100% hit:
+
+[  598.599825] BUG: unable to handle kernel NULL pointer dereference at 0000000000000030
+[  598.607782] IP: tcf_action_destroy+0xc0/0x140
+
+Or:
+
+[   40.858924] BUG: unable to handle kernel NULL pointer dereference at 0000000000000030
+[   40.862840] IP: tcf_generic_walker+0x534/0x820
+
+Fix this by storing the ops and use them directly for module_put call.
+
+Fixes: a85a970af265 ("net_sched: move tc_action into tcf_common")
+Signed-off-by: Jiri Pirko <jiri@mellanox.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/act_api.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/sched/act_api.c
++++ b/net/sched/act_api.c
+@@ -141,7 +141,7 @@ static int tcf_del_walker(struct tcf_has
+               hlist_for_each_entry_safe(p, n, head, tcfa_head) {
+                       ret = __tcf_hash_release(p, false, true);
+                       if (ret == ACT_P_DELETED) {
+-                              module_put(p->ops->owner);
++                              module_put(ops->owner);
+                               n_i++;
+                       } else if (ret < 0)
+                               goto nla_put_failure;
+@@ -450,13 +450,15 @@ EXPORT_SYMBOL(tcf_action_exec);
+ int tcf_action_destroy(struct list_head *actions, int bind)
+ {
++      const struct tc_action_ops *ops;
+       struct tc_action *a, *tmp;
+       int ret = 0;
+       list_for_each_entry_safe(a, tmp, actions, list) {
++              ops = a->ops;
+               ret = __tcf_hash_release(a, bind, true);
+               if (ret == ACT_P_DELETED)
+-                      module_put(a->ops->owner);
++                      module_put(ops->owner);
+               else if (ret < 0)
+                       return ret;
+       }
diff --git a/queue-4.9/net-set-sk_prot_creator-when-cloning-sockets-to-the-right-proto.patch b/queue-4.9/net-set-sk_prot_creator-when-cloning-sockets-to-the-right-proto.patch
new file mode 100644 (file)
index 0000000..88c74cc
--- /dev/null
@@ -0,0 +1,107 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Christoph Paasch <cpaasch@apple.com>
+Date: Tue, 26 Sep 2017 17:38:50 -0700
+Subject: net: Set sk_prot_creator when cloning sockets to the right proto
+
+From: Christoph Paasch <cpaasch@apple.com>
+
+
+[ Upstream commit 9d538fa60bad4f7b23193c89e843797a1cf71ef3 ]
+
+sk->sk_prot and sk->sk_prot_creator can differ when the app uses
+IPV6_ADDRFORM (transforming an IPv6-socket to an IPv4-one).
+Which is why sk_prot_creator is there to make sure that sk_prot_free()
+does the kmem_cache_free() on the right kmem_cache slab.
+
+Now, if such a socket gets transformed back to a listening socket (using
+connect() with AF_UNSPEC) we will allocate an IPv4 tcp_sock through
+sk_clone_lock() when a new connection comes in. But sk_prot_creator will
+still point to the IPv6 kmem_cache (as everything got copied in
+sk_clone_lock()). When freeing, we will thus put this
+memory back into the IPv6 kmem_cache although it was allocated in the
+IPv4 cache. I have seen memory corruption happening because of this.
+
+With slub-debugging and MEMCG_KMEM enabled this gives the warning
+       "cache_from_obj: Wrong slab cache. TCPv6 but object is from TCP"
+
+A C-program to trigger this:
+
+void main(void)
+{
+        int fd = socket(AF_INET6, SOCK_STREAM, IPPROTO_TCP);
+        int new_fd, newest_fd, client_fd;
+        struct sockaddr_in6 bind_addr;
+        struct sockaddr_in bind_addr4, client_addr1, client_addr2;
+        struct sockaddr unsp;
+        int val;
+
+        memset(&bind_addr, 0, sizeof(bind_addr));
+        bind_addr.sin6_family = AF_INET6;
+        bind_addr.sin6_port = ntohs(42424);
+
+        memset(&client_addr1, 0, sizeof(client_addr1));
+        client_addr1.sin_family = AF_INET;
+        client_addr1.sin_port = ntohs(42424);
+        client_addr1.sin_addr.s_addr = inet_addr("127.0.0.1");
+
+        memset(&client_addr2, 0, sizeof(client_addr2));
+        client_addr2.sin_family = AF_INET;
+        client_addr2.sin_port = ntohs(42421);
+        client_addr2.sin_addr.s_addr = inet_addr("127.0.0.1");
+
+        memset(&unsp, 0, sizeof(unsp));
+        unsp.sa_family = AF_UNSPEC;
+
+        bind(fd, (struct sockaddr *)&bind_addr, sizeof(bind_addr));
+
+        listen(fd, 5);
+
+        client_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+        connect(client_fd, (struct sockaddr *)&client_addr1, sizeof(client_addr1));
+        new_fd = accept(fd, NULL, NULL);
+        close(fd);
+
+        val = AF_INET;
+        setsockopt(new_fd, SOL_IPV6, IPV6_ADDRFORM, &val, sizeof(val));
+
+        connect(new_fd, &unsp, sizeof(unsp));
+
+        memset(&bind_addr4, 0, sizeof(bind_addr4));
+        bind_addr4.sin_family = AF_INET;
+        bind_addr4.sin_port = ntohs(42421);
+        bind(new_fd, (struct sockaddr *)&bind_addr4, sizeof(bind_addr4));
+
+        listen(new_fd, 5);
+
+        client_fd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+        connect(client_fd, (struct sockaddr *)&client_addr2, sizeof(client_addr2));
+
+        newest_fd = accept(new_fd, NULL, NULL);
+        close(new_fd);
+
+        close(client_fd);
+        close(new_fd);
+}
+
+As far as I can see, this bug has been there since the beginning of the
+git-days.
+
+Signed-off-by: Christoph Paasch <cpaasch@apple.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/sock.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1493,6 +1493,8 @@ struct sock *sk_clone_lock(const struct
+               sock_copy(newsk, sk);
++              newsk->sk_prot_creator = sk->sk_prot;
++
+               /* SANITY */
+               if (likely(newsk->sk_net_refcnt))
+                       get_net(sock_net(newsk));
diff --git a/queue-4.9/net_sched-always-reset-qdisc-backlog-in-qdisc_reset.patch b/queue-4.9/net_sched-always-reset-qdisc-backlog-in-qdisc_reset.patch
new file mode 100644 (file)
index 0000000..766bfbd
--- /dev/null
@@ -0,0 +1,33 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Date: Wed, 20 Sep 2017 15:45:36 +0300
+Subject: net_sched: always reset qdisc backlog in qdisc_reset()
+
+From: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+
+
+[ Upstream commit c8e1812960eeae42e2183154927028511c4bc566 ]
+
+SKB stored in qdisc->gso_skb also counted into backlog.
+
+Some qdiscs don't reset backlog to zero in ->reset(),
+for example sfq just dequeue and free all queued skb.
+
+Signed-off-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
+Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too")
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/sched/sch_generic.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/sched/sch_generic.c
++++ b/net/sched/sch_generic.c
+@@ -681,6 +681,7 @@ void qdisc_reset(struct Qdisc *qdisc)
+               qdisc->gso_skb = NULL;
+       }
+       qdisc->q.qlen = 0;
++      qdisc->qstats.backlog = 0;
+ }
+ EXPORT_SYMBOL(qdisc_reset);
diff --git a/queue-4.9/netlink-do-not-proceed-if-dump-s-start-errs.patch b/queue-4.9/netlink-do-not-proceed-if-dump-s-start-errs.patch
new file mode 100644 (file)
index 0000000..730a6b3
--- /dev/null
@@ -0,0 +1,49 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+Date: Thu, 28 Sep 2017 00:41:44 +0200
+Subject: netlink: do not proceed if dump's start() errs
+
+From: "Jason A. Donenfeld" <Jason@zx2c4.com>
+
+
+[ Upstream commit fef0035c0f31322d417d1954bba5ab959bf91183 ]
+
+Drivers that use the start method for netlink dumping rely on dumpit not
+being called if start fails. For example, ila_xlat.c allocates memory
+and assigns it to cb->args[0] in its start() function. It might fail to
+do that and return -ENOMEM instead. However, even when returning an
+error, dumpit will be called, which, in the example above, quickly
+dereferences the memory in cb->args[0], which will OOPS the kernel. This
+is but one example of how this goes wrong.
+
+Since start() has always been a function with an int return type, it
+therefore makes sense to use it properly, rather than ignoring it. This
+patch thus returns early and does not call dumpit() when start() fails.
+
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Cc: Johannes Berg <johannes@sipsolutions.net>
+Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/netlink/af_netlink.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/net/netlink/af_netlink.c
++++ b/net/netlink/af_netlink.c
+@@ -2211,10 +2211,13 @@ int __netlink_dump_start(struct sock *ss
+       mutex_unlock(nlk->cb_mutex);
++      ret = 0;
+       if (cb->start)
+-              cb->start(cb);
++              ret = cb->start(cb);
++
++      if (!ret)
++              ret = netlink_dump(sk);
+-      ret = netlink_dump(sk);
+       sock_put(sk);
+       if (ret)
diff --git a/queue-4.9/packet-hold-bind-lock-when-rebinding-to-fanout-hook.patch b/queue-4.9/packet-hold-bind-lock-when-rebinding-to-fanout-hook.patch
new file mode 100644 (file)
index 0000000..aa54c3e
--- /dev/null
@@ -0,0 +1,74 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Willem de Bruijn <willemb@google.com>
+Date: Thu, 14 Sep 2017 17:14:41 -0400
+Subject: packet: hold bind lock when rebinding to fanout hook
+
+From: Willem de Bruijn <willemb@google.com>
+
+
+[ Upstream commit 008ba2a13f2d04c947adc536d19debb8fe66f110 ]
+
+Packet socket bind operations must hold the po->bind_lock. This keeps
+po->running consistent with whether the socket is actually on a ptype
+list to receive packets.
+
+fanout_add unbinds a socket and its packet_rcv/tpacket_rcv call, then
+binds the fanout object to receive through packet_rcv_fanout.
+
+Make it hold the po->bind_lock when testing po->running and rebinding.
+Else, it can race with other rebind operations, such as that in
+packet_set_ring from packet_rcv to tpacket_rcv. Concurrent updates
+can result in a socket being added to a fanout group twice, causing
+use-after-free KASAN bug reports, among others.
+
+Reported independently by both trinity and syzkaller.
+Verified that the syzkaller reproducer passes after this patch.
+
+Fixes: dc99f600698d ("packet: Add fanout support.")
+Reported-by: nixioaming <nixiaoming@huawei.com>
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |   16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -1648,10 +1648,6 @@ static int fanout_add(struct sock *sk, u
+       mutex_lock(&fanout_mutex);
+-      err = -EINVAL;
+-      if (!po->running)
+-              goto out;
+-
+       err = -EALREADY;
+       if (po->fanout)
+               goto out;
+@@ -1700,7 +1696,10 @@ static int fanout_add(struct sock *sk, u
+               list_add(&match->list, &fanout_list);
+       }
+       err = -EINVAL;
+-      if (match->type == type &&
++
++      spin_lock(&po->bind_lock);
++      if (po->running &&
++          match->type == type &&
+           match->prot_hook.type == po->prot_hook.type &&
+           match->prot_hook.dev == po->prot_hook.dev) {
+               err = -ENOSPC;
+@@ -1712,6 +1711,13 @@ static int fanout_add(struct sock *sk, u
+                       err = 0;
+               }
+       }
++      spin_unlock(&po->bind_lock);
++
++      if (err && !refcount_read(&match->sk_ref)) {
++              list_del(&match->list);
++              kfree(match);
++      }
++
+ out:
+       if (err && rollover) {
+               kfree(rollover);
diff --git a/queue-4.9/packet-in-packet_do_bind-test-fanout-with-bind_lock-held.patch b/queue-4.9/packet-in-packet_do_bind-test-fanout-with-bind_lock-held.patch
new file mode 100644 (file)
index 0000000..d802fce
--- /dev/null
@@ -0,0 +1,53 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Willem de Bruijn <willemb@google.com>
+Date: Tue, 26 Sep 2017 12:19:37 -0400
+Subject: packet: in packet_do_bind, test fanout with bind_lock held
+
+From: Willem de Bruijn <willemb@google.com>
+
+
+[ Upstream commit 4971613c1639d8e5f102c4e797c3bf8f83a5a69e ]
+
+Once a socket has po->fanout set, it remains a member of the group
+until it is destroyed. The prot_hook must be constant and identical
+across sockets in the group.
+
+If fanout_add races with packet_do_bind between the test of po->fanout
+and taking the lock, the bind call may make type or dev inconsistent
+with that of the fanout group.
+
+Hold po->bind_lock when testing po->fanout to avoid this race.
+
+I had to introduce artificial delay (local_bh_enable) to actually
+observe the race.
+
+Fixes: dc99f600698d ("packet: Add fanout support.")
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -3069,13 +3069,15 @@ static int packet_do_bind(struct sock *s
+       int ret = 0;
+       bool unlisted = false;
+-      if (po->fanout)
+-              return -EINVAL;
+-
+       lock_sock(sk);
+       spin_lock(&po->bind_lock);
+       rcu_read_lock();
++      if (po->fanout) {
++              ret = -EINVAL;
++              goto out_unlock;
++      }
++
+       if (name) {
+               dev = dev_get_by_name_rcu(sock_net(sk), name);
+               if (!dev) {
diff --git a/queue-4.9/packet-only-test-po-has_vnet_hdr-once-in-packet_snd.patch b/queue-4.9/packet-only-test-po-has_vnet_hdr-once-in-packet_snd.patch
new file mode 100644 (file)
index 0000000..7dc9ce0
--- /dev/null
@@ -0,0 +1,62 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Willem de Bruijn <willemb@google.com>
+Date: Tue, 26 Sep 2017 12:20:17 -0400
+Subject: packet: only test po->has_vnet_hdr once in packet_snd
+
+From: Willem de Bruijn <willemb@google.com>
+
+
+[ Upstream commit da7c9561015e93d10fe6aab73e9288e0d09d65a6 ]
+
+Packet socket option po->has_vnet_hdr can be updated concurrently with
+other operations if no ring is attached.
+
+Do not test the option twice in packet_snd, as the value may change in
+between calls. A race on setsockopt disable may cause a packet > mtu
+to be sent without having GSO options set.
+
+Fixes: bfd5f4a3d605 ("packet: Add GSO/csum offload support.")
+Signed-off-by: Willem de Bruijn <willemb@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/packet/af_packet.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/packet/af_packet.c
++++ b/net/packet/af_packet.c
+@@ -1713,7 +1713,7 @@ static int fanout_add(struct sock *sk, u
+       }
+       spin_unlock(&po->bind_lock);
+-      if (err && !refcount_read(&match->sk_ref)) {
++      if (err && !atomic_read(&match->sk_ref)) {
+               list_del(&match->list);
+               kfree(match);
+       }
+@@ -2838,6 +2838,7 @@ static int packet_snd(struct socket *soc
+       struct virtio_net_hdr vnet_hdr = { 0 };
+       int offset = 0;
+       struct packet_sock *po = pkt_sk(sk);
++      bool has_vnet_hdr = false;
+       int hlen, tlen, linear;
+       int extra_len = 0;
+@@ -2881,6 +2882,7 @@ static int packet_snd(struct socket *soc
+               err = packet_snd_vnet_parse(msg, &len, &vnet_hdr);
+               if (err)
+                       goto out_unlock;
++              has_vnet_hdr = true;
+       }
+       if (unlikely(sock_flag(sk, SOCK_NOFCS))) {
+@@ -2941,7 +2943,7 @@ static int packet_snd(struct socket *soc
+       packet_pick_tx_queue(dev, skb);
+-      if (po->has_vnet_hdr) {
++      if (has_vnet_hdr) {
+               err = packet_snd_vnet_gso(skb, &vnet_hdr);
+               if (err)
+                       goto out_free;
diff --git a/queue-4.9/sctp-potential-read-out-of-bounds-in-sctp_ulpevent_type_enabled.patch b/queue-4.9/sctp-potential-read-out-of-bounds-in-sctp_ulpevent_type_enabled.patch
new file mode 100644 (file)
index 0000000..6b1b4ec
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Dan Carpenter <dan.carpenter@oracle.com>
+Date: Thu, 14 Sep 2017 02:00:54 +0300
+Subject: sctp: potential read out of bounds in sctp_ulpevent_type_enabled()
+
+From: Dan Carpenter <dan.carpenter@oracle.com>
+
+
+[ Upstream commit fa5f7b51fc3080c2b195fa87c7eca7c05e56f673 ]
+
+This code causes a static checker warning because Smatch doesn't trust
+anything that comes from skb->data.  I've reviewed this code and I do
+think skb->data can be controlled by the user here.
+
+The sctp_event_subscribe struct has 13 __u8 fields and we want to see
+if ours is non-zero.  sn_type can be any value in the 0-USHRT_MAX range.
+We're subtracting SCTP_SN_TYPE_BASE which is 1 << 15 so we could read
+either before the start of the struct or after the end.
+
+This is a very old bug and it's surprising that it would go undetected
+for so long but my theory is that it just doesn't have a big impact so
+it would be hard to notice.
+
+Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/sctp/ulpevent.h |    6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+--- a/include/net/sctp/ulpevent.h
++++ b/include/net/sctp/ulpevent.h
+@@ -141,8 +141,12 @@ __u16 sctp_ulpevent_get_notification_typ
+ static inline int sctp_ulpevent_type_enabled(__u16 sn_type,
+                                            struct sctp_event_subscribe *mask)
+ {
++      int offset = sn_type - SCTP_SN_TYPE_BASE;
+       char *amask = (char *) mask;
+-      return amask[sn_type - SCTP_SN_TYPE_BASE];
++
++      if (offset >= sizeof(struct sctp_event_subscribe))
++              return 0;
++      return amask[offset];
+ }
+ /* Given an event subscription, is this event enabled? */
index 10a48b1642fc24267041b6d2a00d9a921c103fe5..ea36426cbe5a99271cecf7d6d9ae40df52f7a7cb 100644 (file)
@@ -43,3 +43,35 @@ lsm-fix-smack_inode_removexattr-and-xattr_getsecurity-memleak.patch
 alsa-compress-remove-unused-variable.patch
 revert-alsa-echoaudio-purge-contradictions-between-dimension-matrix-members-and-total-number-of-members.patch
 alsa-usx2y-suppress-kernel-warning-at-page-allocation-failures.patch
+mlxsw-spectrum-prevent-mirred-related-crash-on-removal.patch
+net-sched-fix-use-after-free-in-tcf_action_destroy-and-tcf_del_walker.patch
+sctp-potential-read-out-of-bounds-in-sctp_ulpevent_type_enabled.patch
+tcp-update-skb-skb_mstamp-more-carefully.patch
+bpf-verifier-reject-bpf_alu64-bpf_end.patch
+tcp-fix-data-delivery-rate.patch
+udpv6-fix-the-checksum-computation-when-hw-checksum-does-not-apply.patch
+ip6_gre-skb_push-ipv6hdr-before-packing-the-header-in-ip6gre_header.patch
+net-phy-fix-mask-value-write-on-gmii2rgmii-converter-speed-register.patch
+ip6_tunnel-do-not-allow-loading-ip6_tunnel-if-ipv6-is-disabled-in-cmdline.patch
+net-sched-cls_matchall-fix-crash-when-used-with-classful-qdisc.patch
+tcp-fastopen-fix-on-syn-data-transmit-failure.patch
+net-emac-fix-napi-poll-list-corruption.patch
+packet-hold-bind-lock-when-rebinding-to-fanout-hook.patch
+bpf-one-perf-event-close-won-t-free-bpf-program-attached-by-another-perf-event.patch
+isdn-i4l-fetch-the-ppp_write-buffer-in-one-shot.patch
+net_sched-always-reset-qdisc-backlog-in-qdisc_reset.patch
+net-qcom-emac-specify-the-correct-size-when-mapping-a-dma-buffer.patch
+vti-fix-use-after-free-in-vti_tunnel_xmit-vti6_tnl_xmit.patch
+l2tp-avoid-schedule-while-atomic-in-exit_net.patch
+l2tp-fix-race-condition-in-l2tp_tunnel_delete.patch
+tun-bail-out-from-tun_get_user-if-the-skb-is-empty.patch
+net-dsa-fix-network-device-registration-order.patch
+packet-in-packet_do_bind-test-fanout-with-bind_lock-held.patch
+packet-only-test-po-has_vnet_hdr-once-in-packet_snd.patch
+net-set-sk_prot_creator-when-cloning-sockets-to-the-right-proto.patch
+netlink-do-not-proceed-if-dump-s-start-errs.patch
+ip6_gre-ip6gre_tap-device-should-keep-dst.patch
+ip6_tunnel-update-mtu-properly-for-arphrd_ether-tunnel-device-in-tx-path.patch
+tipc-use-only-positive-error-codes-in-messages.patch
+net-rtnetlink-fix-info-leak-in-rtm_getstats-call.patch
+socket-bpf-fix-possible-use-after-free.patch
diff --git a/queue-4.9/socket-bpf-fix-possible-use-after-free.patch b/queue-4.9/socket-bpf-fix-possible-use-after-free.patch
new file mode 100644 (file)
index 0000000..753ee62
--- /dev/null
@@ -0,0 +1,86 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Mon, 2 Oct 2017 12:20:51 -0700
+Subject: socket, bpf: fix possible use after free
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit eefca20eb20c66b06cf5ed09b49b1a7caaa27b7b ]
+
+Starting from linux-4.4, 3WHS no longer takes the listener lock.
+
+Since this time, we might hit a use-after-free in sk_filter_charge(),
+if the filter we got in the memcpy() of the listener content
+just happened to be replaced by a thread changing listener BPF filter.
+
+To fix this, we need to make sure the filter refcount is not already
+zero before incrementing it again.
+
+Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Alexei Starovoitov <ast@kernel.org>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/core/filter.c |   15 +++++++++++++--
+ net/core/sock.c   |    5 ++++-
+ 2 files changed, 17 insertions(+), 3 deletions(-)
+
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -937,20 +937,31 @@ void sk_filter_uncharge(struct sock *sk,
+ /* try to charge the socket memory if there is space available
+  * return true on success
+  */
+-bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
++static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp)
+ {
+       u32 filter_size = bpf_prog_size(fp->prog->len);
+       /* same check as in sock_kmalloc() */
+       if (filter_size <= sysctl_optmem_max &&
+           atomic_read(&sk->sk_omem_alloc) + filter_size < sysctl_optmem_max) {
+-              atomic_inc(&fp->refcnt);
+               atomic_add(filter_size, &sk->sk_omem_alloc);
+               return true;
+       }
+       return false;
+ }
++bool sk_filter_charge(struct sock *sk, struct sk_filter *fp)
++{
++      if (!atomic_inc_not_zero(&fp->refcnt))
++              return false;
++
++      if (!__sk_filter_charge(sk, fp)) {
++              sk_filter_release(fp);
++              return false;
++      }
++      return true;
++}
++
+ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
+ {
+       struct sock_filter *old_prog;
+--- a/net/core/sock.c
++++ b/net/core/sock.c
+@@ -1528,13 +1528,16 @@ struct sock *sk_clone_lock(const struct
+               sock_reset_flag(newsk, SOCK_DONE);
+               skb_queue_head_init(&newsk->sk_error_queue);
+-              filter = rcu_dereference_protected(newsk->sk_filter, 1);
++              rcu_read_lock();
++              filter = rcu_dereference(sk->sk_filter);
+               if (filter != NULL)
+                       /* though it's an empty new sock, the charging may fail
+                        * if sysctl_optmem_max was changed between creation of
+                        * original socket and cloning
+                        */
+                       is_charged = sk_filter_charge(newsk, filter);
++              RCU_INIT_POINTER(newsk->sk_filter, filter);
++              rcu_read_unlock();
+               if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) {
+                       /* We need to make sure that we don't uncharge the new
diff --git a/queue-4.9/tcp-fastopen-fix-on-syn-data-transmit-failure.patch b/queue-4.9/tcp-fastopen-fix-on-syn-data-transmit-failure.patch
new file mode 100644 (file)
index 0000000..1ef75cf
--- /dev/null
@@ -0,0 +1,97 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Tue, 19 Sep 2017 10:05:57 -0700
+Subject: tcp: fastopen: fix on syn-data transmit failure
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit b5b7db8d680464b1d631fd016f5e093419f0bfd9 ]
+
+Our recent change exposed a bug in TCP Fastopen Client that syzkaller
+found right away [1]
+
+When we prepare skb with SYN+DATA, we attempt to transmit it,
+and we update socket state as if the transmit was a success.
+
+In socket RTX queue we have two skbs, one with the SYN alone,
+and a second one containing the DATA.
+
+When (malicious) ACK comes in, we now complain that second one had no
+skb_mstamp.
+
+The proper fix is to make sure that if the transmit failed, we do not
+pretend we sent the DATA skb, and make it our send_head.
+
+When 3WHS completes, we can now send the DATA right away, without having
+to wait for a timeout.
+
+[1]
+WARNING: CPU: 0 PID: 100189 at net/ipv4/tcp_input.c:3117 tcp_clean_rtx_queue+0x2057/0x2ab0 net/ipv4/tcp_input.c:3117()
+
+ WARN_ON_ONCE(last_ackt == 0);
+
+Modules linked in:
+CPU: 0 PID: 100189 Comm: syz-executor1 Not tainted
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
+ 0000000000000000 ffff8800b35cb1d8 ffffffff81cad00d 0000000000000000
+ ffffffff828a4347 ffff88009f86c080 ffffffff8316eb20 0000000000000d7f
+ ffff8800b35cb220 ffffffff812c33c2 ffff8800baad2440 00000009d46575c0
+Call Trace:
+ [<ffffffff81cad00d>] __dump_stack
+ [<ffffffff81cad00d>] dump_stack+0xc1/0x124
+ [<ffffffff812c33c2>] warn_slowpath_common+0xe2/0x150
+ [<ffffffff812c361e>] warn_slowpath_null+0x2e/0x40
+ [<ffffffff828a4347>] tcp_clean_rtx_queue+0x2057/0x2ab0 n
+ [<ffffffff828ae6fd>] tcp_ack+0x151d/0x3930
+ [<ffffffff828baa09>] tcp_rcv_state_process+0x1c69/0x4fd0
+ [<ffffffff828efb7f>] tcp_v4_do_rcv+0x54f/0x7c0
+ [<ffffffff8258aacb>] sk_backlog_rcv
+ [<ffffffff8258aacb>] __release_sock+0x12b/0x3a0
+ [<ffffffff8258ad9e>] release_sock+0x5e/0x1c0
+ [<ffffffff8294a785>] inet_wait_for_connect
+ [<ffffffff8294a785>] __inet_stream_connect+0x545/0xc50
+ [<ffffffff82886f08>] tcp_sendmsg_fastopen
+ [<ffffffff82886f08>] tcp_sendmsg+0x2298/0x35a0
+ [<ffffffff82952515>] inet_sendmsg+0xe5/0x520
+ [<ffffffff8257152f>] sock_sendmsg_nosec
+ [<ffffffff8257152f>] sock_sendmsg+0xcf/0x110
+
+Fixes: 8c72c65b426b ("tcp: update skb->skb_mstamp more carefully")
+Fixes: 783237e8daf1 ("net-tcp: Fast Open client - sending SYN-data")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: Dmitry Vyukov <dvyukov@google.com>
+Cc: Neal Cardwell <ncardwell@google.com>
+Cc: Yuchung Cheng <ycheng@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -3329,6 +3329,10 @@ static int tcp_send_syn_data(struct sock
+               goto done;
+       }
++      /* data was not sent, this is our new send_head */
++      sk->sk_send_head = syn_data;
++      tp->packets_out -= tcp_skb_pcount(syn_data);
++
+ fallback:
+       /* Send a regular SYN with Fast Open cookie request option */
+       if (fo->cookie.len > 0)
+@@ -3378,6 +3382,11 @@ int tcp_connect(struct sock *sk)
+        */
+       tp->snd_nxt = tp->write_seq;
+       tp->pushed_seq = tp->write_seq;
++      buff = tcp_send_head(sk);
++      if (unlikely(buff)) {
++              tp->snd_nxt     = TCP_SKB_CB(buff)->seq;
++              tp->pushed_seq  = TCP_SKB_CB(buff)->seq;
++      }
+       TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS);
+       /* Timer for repeating the SYN until an answer. */
diff --git a/queue-4.9/tcp-fix-data-delivery-rate.patch b/queue-4.9/tcp-fix-data-delivery-rate.patch
new file mode 100644 (file)
index 0000000..11e2f6f
--- /dev/null
@@ -0,0 +1,46 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Eric Dumazet <edumazet@google.com>
+Date: Fri, 15 Sep 2017 16:47:42 -0700
+Subject: tcp: fix data delivery rate
+
+From: Eric Dumazet <edumazet@google.com>
+
+
+[ Upstream commit fc22579917eb7e13433448a342f1cb1592920940 ]
+
+Now skb->mstamp_skb is updated later, we also need to call
+tcp_rate_skb_sent() after the update is done.
+
+Fixes: 8c72c65b426b ("tcp: update skb->skb_mstamp more carefully")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |    7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -925,8 +925,6 @@ static int tcp_transmit_skb(struct sock
+       if (clone_it) {
+               TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
+                       - tp->snd_una;
+-              tcp_rate_skb_sent(sk, skb);
+-
+               oskb = skb;
+               if (unlikely(skb_cloned(skb)))
+                       skb = pskb_copy(skb, gfp_mask);
+@@ -1041,9 +1039,10 @@ static int tcp_transmit_skb(struct sock
+               tcp_enter_cwr(sk);
+               err = net_xmit_eval(err);
+       }
+-      if (!err && oskb)
++      if (!err && oskb) {
+               skb_mstamp_get(&oskb->skb_mstamp);
+-
++              tcp_rate_skb_sent(sk, oskb);
++      }
+       return err;
+ }
diff --git a/queue-4.9/tcp-update-skb-skb_mstamp-more-carefully.patch b/queue-4.9/tcp-update-skb-skb_mstamp-more-carefully.patch
new file mode 100644 (file)
index 0000000..ceb0235
--- /dev/null
@@ -0,0 +1,142 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Eric Dumazet <edumazet@googl.com>
+Date: Wed, 13 Sep 2017 20:30:39 -0700
+Subject: tcp: update skb->skb_mstamp more carefully
+
+From: Eric Dumazet <edumazet@googl.com>
+
+
+[ Upstream commit 8c72c65b426b47b3c166a8fef0d8927fe5e8a28d ]
+
+liujian reported a problem in TCP_USER_TIMEOUT processing with a patch
+in tcp_probe_timer() :
+      https://www.spinics.net/lists/netdev/msg454496.html
+
+After investigations, the root cause of the problem is that we update
+skb->skb_mstamp of skbs in write queue, even if the attempt to send a
+clone or copy of it failed. One reason being a routing problem.
+
+This patch prevents this, solving liujian issue.
+
+It also removes a potential RTT miscalculation, since
+__tcp_retransmit_skb() is not OR-ing TCP_SKB_CB(skb)->sacked with
+TCPCB_EVER_RETRANS if a failure happens, but skb->skb_mstamp has
+been changed.
+
+A future ACK would then lead to a very small RTT sample and min_rtt
+would then be lowered to this too small value.
+
+Tested:
+
+# cat user_timeout.pkt
+--local_ip=192.168.102.64
+
+    0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+   +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+   +0 bind(3, ..., ...) = 0
+   +0 listen(3, 1) = 0
+
+   +0 `ifconfig tun0 192.168.102.64/16; ip ro add 192.0.2.1 dev tun0`
+
+   +0 < S 0:0(0) win 0 <mss 1460>
+   +0 > S. 0:0(0) ack 1 <mss 1460>
+
+  +.1 < . 1:1(0) ack 1 win 65530
+   +0 accept(3, ..., ...) = 4
+
+   +0 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0
+   +0 write(4, ..., 24) = 24
+   +0 > P. 1:25(24) ack 1 win 29200
+   +.1 < . 1:1(0) ack 25 win 65530
+
+//change the ipaddress
+   +1 `ifconfig tun0 192.168.0.10/16`
+
+   +1 write(4, ..., 24) = 24
+   +1 write(4, ..., 24) = 24
+   +1 write(4, ..., 24) = 24
+   +1 write(4, ..., 24) = 24
+
+   +0 `ifconfig tun0 192.168.102.64/16`
+   +0 < . 1:2(1) ack 25 win 65530
+   +0 `ifconfig tun0 192.168.0.10/16`
+
+   +3 write(4, ..., 24) = -1
+
+# ./packetdrill user_timeout.pkt
+
+Signed-off-by: Eric Dumazet <edumazet@googl.com>
+Reported-by: liujian <liujian56@huawei.com>
+Acked-by: Neal Cardwell <ncardwell@google.com>
+Acked-by: Yuchung Cheng <ycheng@google.com>
+Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/tcp_output.c |   19 ++++++++++++-------
+ 1 file changed, 12 insertions(+), 7 deletions(-)
+
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -914,6 +914,7 @@ static int tcp_transmit_skb(struct sock
+       struct tcp_skb_cb *tcb;
+       struct tcp_out_options opts;
+       unsigned int tcp_options_size, tcp_header_size;
++      struct sk_buff *oskb = NULL;
+       struct tcp_md5sig_key *md5;
+       struct tcphdr *th;
+       int err;
+@@ -922,11 +923,11 @@ static int tcp_transmit_skb(struct sock
+       tp = tcp_sk(sk);
+       if (clone_it) {
+-              skb_mstamp_get(&skb->skb_mstamp);
+               TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
+                       - tp->snd_una;
+               tcp_rate_skb_sent(sk, skb);
++              oskb = skb;
+               if (unlikely(skb_cloned(skb)))
+                       skb = pskb_copy(skb, gfp_mask);
+               else
+@@ -934,6 +935,7 @@ static int tcp_transmit_skb(struct sock
+               if (unlikely(!skb))
+                       return -ENOBUFS;
+       }
++      skb_mstamp_get(&skb->skb_mstamp);
+       inet = inet_sk(sk);
+       tcb = TCP_SKB_CB(skb);
+@@ -1035,12 +1037,14 @@ static int tcp_transmit_skb(struct sock
+       err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
+-      if (likely(err <= 0))
+-              return err;
+-
+-      tcp_enter_cwr(sk);
++      if (unlikely(err > 0)) {
++              tcp_enter_cwr(sk);
++              err = net_xmit_eval(err);
++      }
++      if (!err && oskb)
++              skb_mstamp_get(&oskb->skb_mstamp);
+-      return net_xmit_eval(err);
++      return err;
+ }
+ /* This routine just queues the buffer for sending.
+@@ -2709,10 +2713,11 @@ int __tcp_retransmit_skb(struct sock *sk
+                    skb_headroom(skb) >= 0xFFFF)) {
+               struct sk_buff *nskb;
+-              skb_mstamp_get(&skb->skb_mstamp);
+               nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC);
+               err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) :
+                            -ENOBUFS;
++              if (!err)
++                      skb_mstamp_get(&skb->skb_mstamp);
+       } else {
+               err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC);
+       }
diff --git a/queue-4.9/tipc-use-only-positive-error-codes-in-messages.patch b/queue-4.9/tipc-use-only-positive-error-codes-in-messages.patch
new file mode 100644 (file)
index 0000000..1974ca1
--- /dev/null
@@ -0,0 +1,38 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
+Date: Fri, 29 Sep 2017 10:02:54 +0200
+Subject: tipc: use only positive error codes in messages
+
+From: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
+
+
+[ Upstream commit aad06212d36cf34859428a0a279e5c14ee5c9e26 ]
+
+In commit e3a77561e7d32 ("tipc: split up function tipc_msg_eval()"),
+we have updated the function tipc_msg_lookup_dest() to set the error
+codes to negative values at destination lookup failures. Thus when
+the function sets the error code to -TIPC_ERR_NO_NAME, its inserted
+into the 4 bit error field of the message header as 0xf instead of
+TIPC_ERR_NO_NAME (1). The value 0xf is an unknown error code.
+
+In this commit, we set only positive error code.
+
+Fixes: e3a77561e7d32 ("tipc: split up function tipc_msg_eval()")
+Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvaragan@ericsson.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/tipc/msg.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/tipc/msg.c
++++ b/net/tipc/msg.c
+@@ -547,7 +547,7 @@ bool tipc_msg_lookup_dest(struct net *ne
+               return false;
+       if (msg_errcode(msg))
+               return false;
+-      *err = -TIPC_ERR_NO_NAME;
++      *err = TIPC_ERR_NO_NAME;
+       if (skb_linearize(skb))
+               return false;
+       msg = buf_msg(skb);
diff --git a/queue-4.9/tun-bail-out-from-tun_get_user-if-the-skb-is-empty.patch b/queue-4.9/tun-bail-out-from-tun_get_user-if-the-skb-is-empty.patch
new file mode 100644 (file)
index 0000000..331c16c
--- /dev/null
@@ -0,0 +1,111 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Alexander Potapenko <glider@google.com>
+Date: Thu, 28 Sep 2017 11:32:37 +0200
+Subject: tun: bail out from tun_get_user() if the skb is empty
+
+From: Alexander Potapenko <glider@google.com>
+
+
+[ Upstream commit 2580c4c17aee3ad58e9751012bad278dd074ccae ]
+
+KMSAN (https://github.com/google/kmsan) reported accessing uninitialized
+skb->data[0] in the case the skb is empty (i.e. skb->len is 0):
+
+================================================
+BUG: KMSAN: use of uninitialized memory in tun_get_user+0x19ba/0x3770
+CPU: 0 PID: 3051 Comm: probe Not tainted 4.13.0+ #3140
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
+Call Trace:
+...
+ __msan_warning_32+0x66/0xb0 mm/kmsan/kmsan_instr.c:477
+ tun_get_user+0x19ba/0x3770 drivers/net/tun.c:1301
+ tun_chr_write_iter+0x19f/0x300 drivers/net/tun.c:1365
+ call_write_iter ./include/linux/fs.h:1743
+ new_sync_write fs/read_write.c:457
+ __vfs_write+0x6c3/0x7f0 fs/read_write.c:470
+ vfs_write+0x3e4/0x770 fs/read_write.c:518
+ SYSC_write+0x12f/0x2b0 fs/read_write.c:565
+ SyS_write+0x55/0x80 fs/read_write.c:557
+ do_syscall_64+0x242/0x330 arch/x86/entry/common.c:284
+ entry_SYSCALL64_slow_path+0x25/0x25 arch/x86/entry/entry_64.S:245
+...
+origin:
+...
+ kmsan_poison_shadow+0x6e/0xc0 mm/kmsan/kmsan.c:211
+ slab_alloc_node mm/slub.c:2732
+ __kmalloc_node_track_caller+0x351/0x370 mm/slub.c:4351
+ __kmalloc_reserve net/core/skbuff.c:138
+ __alloc_skb+0x26a/0x810 net/core/skbuff.c:231
+ alloc_skb ./include/linux/skbuff.h:903
+ alloc_skb_with_frags+0x1d7/0xc80 net/core/skbuff.c:4756
+ sock_alloc_send_pskb+0xabf/0xfe0 net/core/sock.c:2037
+ tun_alloc_skb drivers/net/tun.c:1144
+ tun_get_user+0x9a8/0x3770 drivers/net/tun.c:1274
+ tun_chr_write_iter+0x19f/0x300 drivers/net/tun.c:1365
+ call_write_iter ./include/linux/fs.h:1743
+ new_sync_write fs/read_write.c:457
+ __vfs_write+0x6c3/0x7f0 fs/read_write.c:470
+ vfs_write+0x3e4/0x770 fs/read_write.c:518
+ SYSC_write+0x12f/0x2b0 fs/read_write.c:565
+ SyS_write+0x55/0x80 fs/read_write.c:557
+ do_syscall_64+0x242/0x330 arch/x86/entry/common.c:284
+ return_from_SYSCALL_64+0x0/0x6a arch/x86/entry/entry_64.S:245
+================================================
+
+Make sure tun_get_user() doesn't touch skb->data[0] unless there is
+actual data.
+
+C reproducer below:
+==========================
+    // autogenerated by syzkaller (http://github.com/google/syzkaller)
+
+    #define _GNU_SOURCE
+
+    #include <fcntl.h>
+    #include <linux/if_tun.h>
+    #include <netinet/ip.h>
+    #include <net/if.h>
+    #include <string.h>
+    #include <sys/ioctl.h>
+
+    int main()
+    {
+      int sock = socket(PF_INET, SOCK_STREAM, IPPROTO_IP);
+      int tun_fd = open("/dev/net/tun", O_RDWR);
+      struct ifreq req;
+      memset(&req, 0, sizeof(struct ifreq));
+      strcpy((char*)&req.ifr_name, "gre0");
+      req.ifr_flags = IFF_UP | IFF_MULTICAST;
+      ioctl(tun_fd, TUNSETIFF, &req);
+      ioctl(sock, SIOCSIFFLAGS, "gre0");
+      write(tun_fd, "hi", 0);
+      return 0;
+    }
+==========================
+
+Signed-off-by: Alexander Potapenko <glider@google.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/net/tun.c |    8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -1279,11 +1279,13 @@ static ssize_t tun_get_user(struct tun_s
+       switch (tun->flags & TUN_TYPE_MASK) {
+       case IFF_TUN:
+               if (tun->flags & IFF_NO_PI) {
+-                      switch (skb->data[0] & 0xf0) {
+-                      case 0x40:
++                      u8 ip_version = skb->len ? (skb->data[0] >> 4) : 0;
++
++                      switch (ip_version) {
++                      case 4:
+                               pi.proto = htons(ETH_P_IP);
+                               break;
+-                      case 0x60:
++                      case 6:
+                               pi.proto = htons(ETH_P_IPV6);
+                               break;
+                       default:
diff --git a/queue-4.9/udpv6-fix-the-checksum-computation-when-hw-checksum-does-not-apply.patch b/queue-4.9/udpv6-fix-the-checksum-computation-when-hw-checksum-does-not-apply.patch
new file mode 100644 (file)
index 0000000..acb2d6b
--- /dev/null
@@ -0,0 +1,36 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+Date: Wed, 13 Sep 2017 19:30:51 -0600
+Subject: udpv6: Fix the checksum computation when HW checksum does not apply
+
+From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+
+
+[ Upstream commit 63ecc3d9436f8012e49dc846d6cb0a85a3433517 ]
+
+While trying an ESP transport mode encryption for UDPv6 packets of
+datagram size 1436 with MTU 1500, checksum error was observed in
+the secondary fragment.
+
+This error occurs due to the UDP payload checksum being missed out
+when computing the full checksum for these packets in
+udp6_hwcsum_outgoing().
+
+Fixes: d39d938c8228 ("ipv6: Introduce udpv6_send_skb()")
+Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv6/udp.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/net/ipv6/udp.c
++++ b/net/ipv6/udp.c
+@@ -915,6 +915,7 @@ static void udp6_hwcsum_outgoing(struct
+                */
+               offset = skb_transport_offset(skb);
+               skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
++              csum = skb->csum;
+               skb->ip_summed = CHECKSUM_NONE;
diff --git a/queue-4.9/vti-fix-use-after-free-in-vti_tunnel_xmit-vti6_tnl_xmit.patch b/queue-4.9/vti-fix-use-after-free-in-vti_tunnel_xmit-vti6_tnl_xmit.patch
new file mode 100644 (file)
index 0000000..07dfd14
--- /dev/null
@@ -0,0 +1,98 @@
+From foo@baz Tue Oct 10 16:09:22 CEST 2017
+From: Alexey Kodanev <alexey.kodanev@oracle.com>
+Date: Tue, 26 Sep 2017 15:14:29 +0300
+Subject: vti: fix use after free in vti_tunnel_xmit/vti6_tnl_xmit
+
+From: Alexey Kodanev <alexey.kodanev@oracle.com>
+
+
+[ Upstream commit 36f6ee22d2d66046e369757ec6bbe1c482957ba6 ]
+
+When running LTP IPsec tests, KASan might report:
+
+BUG: KASAN: use-after-free in vti_tunnel_xmit+0xeee/0xff0 [ip_vti]
+Read of size 4 at addr ffff880dc6ad1980 by task swapper/0/0
+...
+Call Trace:
+  <IRQ>
+  dump_stack+0x63/0x89
+  print_address_description+0x7c/0x290
+  kasan_report+0x28d/0x370
+  ? vti_tunnel_xmit+0xeee/0xff0 [ip_vti]
+  __asan_report_load4_noabort+0x19/0x20
+  vti_tunnel_xmit+0xeee/0xff0 [ip_vti]
+  ? vti_init_net+0x190/0x190 [ip_vti]
+  ? save_stack_trace+0x1b/0x20
+  ? save_stack+0x46/0xd0
+  dev_hard_start_xmit+0x147/0x510
+  ? icmp_echo.part.24+0x1f0/0x210
+  __dev_queue_xmit+0x1394/0x1c60
+...
+Freed by task 0:
+  save_stack_trace+0x1b/0x20
+  save_stack+0x46/0xd0
+  kasan_slab_free+0x70/0xc0
+  kmem_cache_free+0x81/0x1e0
+  kfree_skbmem+0xb1/0xe0
+  kfree_skb+0x75/0x170
+  kfree_skb_list+0x3e/0x60
+  __dev_queue_xmit+0x1298/0x1c60
+  dev_queue_xmit+0x10/0x20
+  neigh_resolve_output+0x3a8/0x740
+  ip_finish_output2+0x5c0/0xe70
+  ip_finish_output+0x4ba/0x680
+  ip_output+0x1c1/0x3a0
+  xfrm_output_resume+0xc65/0x13d0
+  xfrm_output+0x1e4/0x380
+  xfrm4_output_finish+0x5c/0x70
+
+Can be fixed if we get skb->len before dst_output().
+
+Fixes: b9959fd3b0fa ("vti: switch to new ip tunnel code")
+Fixes: 22e1b23dafa8 ("vti6: Support inter address family tunneling.")
+Signed-off-by: Alexey Kodanev <alexey.kodanev@oracle.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/ip_vti.c  |    3 ++-
+ net/ipv6/ip6_vti.c |    3 ++-
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+--- a/net/ipv4/ip_vti.c
++++ b/net/ipv4/ip_vti.c
+@@ -168,6 +168,7 @@ static netdev_tx_t vti_xmit(struct sk_bu
+       struct ip_tunnel_parm *parms = &tunnel->parms;
+       struct dst_entry *dst = skb_dst(skb);
+       struct net_device *tdev;        /* Device to other host */
++      int pkt_len = skb->len;
+       int err;
+       int mtu;
+@@ -229,7 +230,7 @@ static netdev_tx_t vti_xmit(struct sk_bu
+       err = dst_output(tunnel->net, skb->sk, skb);
+       if (net_xmit_eval(err) == 0)
+-              err = skb->len;
++              err = pkt_len;
+       iptunnel_xmit_stats(dev, err);
+       return NETDEV_TX_OK;
+--- a/net/ipv6/ip6_vti.c
++++ b/net/ipv6/ip6_vti.c
+@@ -445,6 +445,7 @@ vti6_xmit(struct sk_buff *skb, struct ne
+       struct dst_entry *dst = skb_dst(skb);
+       struct net_device *tdev;
+       struct xfrm_state *x;
++      int pkt_len = skb->len;
+       int err = -1;
+       int mtu;
+@@ -498,7 +499,7 @@ vti6_xmit(struct sk_buff *skb, struct ne
+               struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats);
+               u64_stats_update_begin(&tstats->syncp);
+-              tstats->tx_bytes += skb->len;
++              tstats->tx_bytes += pkt_len;
+               tstats->tx_packets++;
+               u64_stats_update_end(&tstats->syncp);
+       } else {